Maven Clover report

Clover coverage report - Maven Clover report

Coverage timestamp: Tue Aug 1 2006 15:09:51 CEST

FRAMES NO FRAMES

file stats:	LOC:	1,007		Methods:	6
	NCLOC:	713		Classes:	3

Source file

Conditionals

Statements

Methods

TOTAL

EncodingUtils.java

47.3%

47.5%

33.3%

47.1%

1		/*
2		* Java HTML Tidy - JTidy
3		* HTML parser and pretty printer
4		*
5		* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6		* Institute of Technology, Institut National de Recherche en
7		* Informatique et en Automatique, Keio University). All Rights
8		* Reserved.
9		*
10		* Contributing Author(s):
11		*
12		* Dave Raggett <dsr@w3.org>
13		* Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14		* Gary L Peskin <garyp@firstech.com> (Java development)
15		* Sami Lempinen <sami@lempinen.net> (release management)
16		* Fabrizio Giustina <fgiust at users.sourceforge.net>
17		*
18		* The contributing author(s) would like to thank all those who
19		* helped with testing, bug fixes, and patience. This wouldn't
20		* have been possible without all of you.
21		*
22		* COPYRIGHT NOTICE:
23		*
24		* This software and documentation is provided "as is," and
25		* the copyright holders and contributing author(s) make no
26		* representations or warranties, express or implied, including
27		* but not limited to, warranties of merchantability or fitness
28		* for any particular purpose or that the use of the software or
29		* documentation will not infringe any third party patents,
30		* copyrights, trademarks or other rights.
31		*
32		* The copyright holders and contributing author(s) will not be
33		* liable for any direct, indirect, special or consequential damages
34		* arising out of any use of the software or documentation, even if
35		* advised of the possibility of such damage.
36		*
37		* Permission is hereby granted to use, copy, modify, and distribute
38		* this source code, or portions hereof, documentation and executables,
39		* for any purpose, without fee, subject to the following restrictions:
40		*
41		* 1. The origin of this source code must not be misrepresented.
42		* 2. Altered versions must be plainly marked as such and must
43		* not be misrepresented as being the original source.
44		* 3. This Copyright notice may not be removed or altered from any
45		* source or altered source distribution.
46		*
47		* The copyright holders and contributing author(s) specifically
48		* permit, without fee, and encourage the use of this source code
49		* as a component for supporting the Hypertext Markup Language in
50		* commercial products. If you use this source code in a product,
51		* acknowledgment is not required but would be appreciated.
52		*
53		*/
54		package org.w3c.tidy;
55
56		/**
57		* @author Fabrizio Giustina
58		* @version $Revision: 779 $ ($Author: fgiust $)
59		*/
60		public final class EncodingUtils
61		{
62
63		/**
64		* the big-endian (default) UNICODE BOM.
65		*/
66		public static final int UNICODE_BOM_BE = 0xFEFF;
67
68		/**
69		* the default (big-endian) UNICODE BOM.
70		*/
71		public static final int UNICODE_BOM = UNICODE_BOM_BE;
72
73		/**
74		* the little-endian UNICODE BOM.
75		*/
76		public static final int UNICODE_BOM_LE = 0xFFFE;
77
78		/**
79		* the UTF-8 UNICODE BOM.
80		*/
81		public static final int UNICODE_BOM_UTF8 = 0xEFBBBF;
82
83		/**
84		* states for ISO 2022 A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch
85		* character sets. The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" +
86		* "$" + ? and "ESC" + "$" + "(" + ? for multibyte character sets. State ASCII.
87		*/
88		public static final int FSM_ASCII = 0;
89
90		/**
91		* state ESC.
92		*/
93		public static final int FSM_ESC = 1;
94
95		/**
96		* state ESCD.
97		*/
98		public static final int FSM_ESCD = 2;
99
100		/**
101		* state ESCDP.
102		*/
103		public static final int FSM_ESCDP = 3;
104
105		/**
106		* state ESCP.
107		*/
108		public static final int FSM_ESCP = 4;
109
110		/**
111		* state NONASCII.
112		*/
113		public static final int FSM_NONASCII = 5;
114
115		/**
116		* Max UTF-88 valid char value.
117		*/
118		public static final int MAX_UTF8_FROM_UCS4 = 0x10FFFF;
119
120		/**
121		* Max UTF-16 value.
122		*/
123		public static final int MAX_UTF16_FROM_UCS4 = 0x10FFFF;
124
125		/**
126		* utf16 low surrogate.
127		*/
128		public static final int LOW_UTF16_SURROGATE = 0xD800;
129
130		/**
131		* UTF-16 surrogates begin.
132		*/
133		public static final int UTF16_SURROGATES_BEGIN = 0x10000;
134
135		/**
136		* UTF-16 surrogate pair areas: low surrogates begin.
137		*/
138		public static final int UTF16_LOW_SURROGATE_BEGIN = 0xD800;
139
140		/**
141		* UTF-16 surrogate pair areas: low surrogates end.
142		*/
143		public static final int UTF16_LOW_SURROGATE_END = 0xDBFF;
144
145		/**
146		* UTF-16 surrogate pair areas: high surrogates begin.
147		*/
148		public static final int UTF16_HIGH_SURROGATE_BEGIN = 0xDC00;
149
150		/**
151		* UTF-16 surrogate pair areas: high surrogates end.
152		*/
153		public static final int UTF16_HIGH_SURROGATE_END = 0xDFFF;
154
155		/**
156		* UTF-16 high surrogate.
157		*/
158		public static final int HIGH_UTF16_SURROGATE = 0xDFFF;
159
160		/**
161		* UTF-8 bye swap: invalid char.
162		*/
163		private static final int UTF8_BYTE_SWAP_NOT_A_CHAR = 0xFFFE;
164
165		/**
166		* UTF-8 invalid char.
167		*/
168		private static final int UTF8_NOT_A_CHAR = 0xFFFF;
169
170		/**
171		* Mapping for Windows Western character set (128-159) to Unicode.
172		*/
173		private static final int[] WIN2UNICODE = {
174		0x20AC,
175		0x0000,
176		0x201A,
177		0x0192,
178		0x201E,
179		0x2026,
180		0x2020,
181		0x2021,
182		0x02C6,
183		0x2030,
184		0x0160,
185		0x2039,
186		0x0152,
187		0x0000,
188		0x017D,
189		0x0000,
190		0x0000,
191		0x2018,
192		0x2019,
193		0x201C,
194		0x201D,
195		0x2022,
196		0x2013,
197		0x2014,
198		0x02DC,
199		0x2122,
200		0x0161,
201		0x203A,
202		0x0153,
203		0x0000,
204		0x017E,
205		0x0178};
206
207		/**
208		* John Love-Jensen contributed this table for mapping MacRoman character set to Unicode.
209		*/
210		private static final int[] MAC2UNICODE = { // modified to only need chars 128-255/U+0080-U+00FF Terry T 19 Aug 01
211		// x7F = DEL
212		0x00C4,
213		0x00C5,
214		0x00C7,
215		0x00C9,
216		0x00D1,
217		0x00D6,
218		0x00DC,
219		0x00E1,
220		0x00E0,
221		0x00E2,
222		0x00E4,
223		0x00E3,
224		0x00E5,
225		0x00E7,
226		0x00E9,
227		0x00E8,
228		0x00EA,
229		0x00EB,
230		0x00ED,
231		0x00EC,
232		0x00EE,
233		0x00EF,
234		0x00F1,
235		0x00F3,
236		0x00F2,
237		0x00F4,
238		0x00F6,
239		0x00F5,
240		0x00FA,
241		0x00F9,
242		0x00FB,
243		0x00FC,
244		0x2020,
245		0x00B0,
246		0x00A2,
247		0x00A3,
248		0x00A7,
249		0x2022,
250		0x00B6,
251		0x00DF,
252		0x00AE,
253		0x00A9,
254		0x2122,
255		0x00B4,
256		0x00A8,
257		0x2260,
258		0x00C6,
259		0x00D8,
260		0x221E,
261		0x00B1,
262		0x2264,
263		0x2265,
264		0x00A5,
265		0x00B5,
266		0x2202,
267		0x2211,
268		// =BD U+2126 OHM SIGN
269		0x220F,
270		0x03C0,
271		0x222B,
272		0x00AA,
273		0x00BA,
274		0x03A9,
275		0x00E6,
276		0x00F8,
277		0x00BF,
278		0x00A1,
279		0x00AC,
280		0x221A,
281		0x0192,
282		0x2248,
283		0x2206,
284		0x00AB,
285		0x00BB,
286		0x2026,
287		0x00A0,
288		0x00C0,
289		0x00C3,
290		0x00D5,
291		0x0152,
292		0x0153,
293		0x2013,
294		0x2014,
295		0x201C,
296		0x201D,
297		0x2018,
298		0x2019,
299		0x00F7,
300		0x25CA,
301		// =DB U+00A4 CURRENCY SIGN
302		0x00FF,
303		0x0178,
304		0x2044,
305		0x20AC,
306		0x2039,
307		0x203A,
308		0xFB01,
309		0xFB02,
310		0x2021,
311		0x00B7,
312		0x201A,
313		0x201E,
314		0x2030,
315		0x00C2,
316		0x00CA,
317		0x00C1,
318		0x00CB,
319		0x00C8,
320		0x00CD,
321		0x00CE,
322		0x00CF,
323		0x00CC,
324		0x00D3,
325		0x00D4,
326		// xF0 = Apple Logo
327		// =F0 U+2665 BLACK HEART SUIT
328		0xF8FF,
329		0x00D2,
330		0x00DA,
331		0x00DB,
332		0x00D9,
333		0x0131,
334		0x02C6,
335		0x02DC,
336		0x00AF,
337		0x02D8,
338		0x02D9,
339		0x02DA,
340		0x00B8,
341		0x02DD,
342		0x02DB,
343		0x02C7};
344
345		/**
346		* table to map symbol font characters to Unicode; undefined characters are mapped to 0x0000 and characters without
347		* any unicode equivalent are mapped to '?'. Is this appropriate?
348		*/
349		private static final int[] SYMBOL2UNICODE = {
350		0x0000,
351		0x0001,
352		0x0002,
353		0x0003,
354		0x0004,
355		0x0005,
356		0x0006,
357		0x0007,
358		0x0008,
359		0x0009,
360		0x000A,
361		0x000B,
362		0x000C,
363		0x000D,
364		0x000E,
365		0x000F,
366
367		0x0010,
368		0x0011,
369		0x0012,
370		0x0013,
371		0x0014,
372		0x0015,
373		0x0016,
374		0x0017,
375		0x0018,
376		0x0019,
377		0x001A,
378		0x001B,
379		0x001C,
380		0x001D,
381		0x001E,
382		0x001F,
383
384		0x0020,
385		0x0021,
386		0x2200,
387		0x0023,
388		0x2203,
389		0x0025,
390		0x0026,
391		0x220D,
392		0x0028,
393		0x0029,
394		0x2217,
395		0x002B,
396		0x002C,
397		0x2212,
398		0x002E,
399		0x002F,
400
401		0x0030,
402		0x0031,
403		0x0032,
404		0x0033,
405		0x0034,
406		0x0035,
407		0x0036,
408		0x0037,
409		0x0038,
410		0x0039,
411		0x003A,
412		0x003B,
413		0x003C,
414		0x003D,
415		0x003E,
416		0x003F,
417
418		0x2245,
419		0x0391,
420		0x0392,
421		0x03A7,
422		0x0394,
423		0x0395,
424		0x03A6,
425		0x0393,
426		0x0397,
427		0x0399,
428		0x03D1,
429		0x039A,
430		0x039B,
431		0x039C,
432		0x039D,
433		0x039F,
434
435		0x03A0,
436		0x0398,
437		0x03A1,
438		0x03A3,
439		0x03A4,
440		0x03A5,
441		0x03C2,
442		0x03A9,
443		0x039E,
444		0x03A8,
445		0x0396,
446		0x005B,
447		0x2234,
448		0x005D,
449		0x22A5,
450		0x005F,
451
452		0x00AF,
453		0x03B1,
454		0x03B2,
455		0x03C7,
456		0x03B4,
457		0x03B5,
458		0x03C6,
459		0x03B3,
460		0x03B7,
461		0x03B9,
462		0x03D5,
463		0x03BA,
464		0x03BB,
465		0x03BC,
466		0x03BD,
467		0x03BF,
468
469		0x03C0,
470		0x03B8,
471		0x03C1,
472		0x03C3,
473		0x03C4,
474		0x03C5,
475		0x03D6,
476		0x03C9,
477		0x03BE,
478		0x03C8,
479		0x03B6,
480		0x007B,
481		0x007C,
482		0x007D,
483		0x223C,
484		0x003F,
485
486		0x0000,
487		0x0000,
488		0x0000,
489		0x0000,
490		0x0000,
491		0x0000,
492		0x0000,
493		0x0000,
494		0x0000,
495		0x0000,
496		0x0000,
497		0x0000,
498		0x0000,
499		0x0000,
500		0x0000,
501		0x0000,
502
503		0x0000,
504		0x0000,
505		0x0000,
506		0x0000,
507		0x0000,
508		0x0000,
509		0x0000,
510		0x0000,
511		0x0000,
512		0x0000,
513		0x0000,
514		0x0000,
515		0x0000,
516		0x0000,
517		0x0000,
518		0x0000,
519
520		0x00A0,
521		0x03D2,
522		0x2032,
523		0x2264,
524		0x2044,
525		0x221E,
526		0x0192,
527		0x2663,
528		0x2666,
529		0x2665,
530		0x2660,
531		0x2194,
532		0x2190,
533		0x2191,
534		0x2192,
535		0x2193,
536
537		0x00B0,
538		0x00B1,
539		0x2033,
540		0x2265,
541		0x00D7,
542		0x221D,
543		0x2202,
544		0x00B7,
545		0x00F7,
546		0x2260,
547		0x2261,
548		0x2248,
549		0x2026,
550		0x003F,
551		0x003F,
552		0x21B5,
553
554		0x2135,
555		0x2111,
556		0x211C,
557		0x2118,
558		0x2297,
559		0x2295,
560		0x2205,
561		0x2229,
562		0x222A,
563		0x2283,
564		0x2287,
565		0x2284,
566		0x2282,
567		0x2286,
568		0x2208,
569		0x2209,
570
571		0x2220,
572		0x2207,
573		0x00AE,
574		0x00A9,
575		0x2122,
576		0x220F,
577		0x221A,
578		0x22C5,
579		0x00AC,
580		0x2227,
581		0x2228,
582		0x21D4,
583		0x21D0,
584		0x21D1,
585		0x21D2,
586		0x21D3,
587
588		0x25CA,
589		0x2329,
590		0x00AE,
591		0x00A9,
592		0x2122,
593		0x2211,
594		0x003F,
595		0x003F,
596		0x003F,
597		0x003F,
598		0x003F,
599		0x003F,
600		0x003F,
601		0x003F,
602		0x003F,
603		0x003F,
604
605		0x20AC,
606		0x232A,
607		0x222B,
608		0x2320,
609		0x003F,
610		0x2321,
611		0x003F,
612		0x003F,
613		0x003F,
614		0x003F,
615		0x003F,
616		0x003F,
617		0x003F,
618		0x003F,
619		0x003F,
620		0x003F};
621
622		/**
623		* Array of valid UTF8 sequences.
624		*/
625		private static final ValidUTF8Sequence[] VALID_UTF8 = {
626		new ValidUTF8Sequence(0x0000, 0x007F, 1, new char[]{0x00, 0x7F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}),
627		new ValidUTF8Sequence(0x0080, 0x07FF, 2, new char[]{0xC2, 0xDF, 0x80, 0xBF, 0x00, 0x00, 0x00, 0x00}),
628		new ValidUTF8Sequence(0x0800, 0x0FFF, 3, new char[]{0xE0, 0xE0, 0xA0, 0xBF, 0x80, 0xBF, 0x00, 0x00}),
629		new ValidUTF8Sequence(0x1000, 0xFFFF, 3, new char[]{0xE1, 0xEF, 0x80, 0xBF, 0x80, 0xBF, 0x00, 0x00}),
630		new ValidUTF8Sequence(0x10000, 0x3FFFF, 4, new char[]{0xF0, 0xF0, 0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}),
631		new ValidUTF8Sequence(0x40000, 0xFFFFF, 4, new char[]{0xF1, 0xF3, 0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}),
632		new ValidUTF8Sequence(0x100000, 0x10FFFF, 4, new char[]{0xF4, 0xF4, 0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})};
633
634		/**
635		* number of valid utf8 sequances.
636		*/
637		private static final int NUM_UTF8_SEQUENCES = VALID_UTF8.length;
638
639		/**
640		* Offset for utf8 sequences.
641		*/
642		private static final int[] OFFSET_UTF8_SEQUENCES = {0, // 1 byte
643		1, // 2 bytes
644		2, // 3 bytes
645		4, // 4 bytes
646		NUM_UTF8_SEQUENCES}; // must be last
647
648		/**
649		* don't instantiate.
650		*/
651	0	private EncodingUtils()
652		{
653		// unused
654		}
655
656		/**
657		* Function for conversion from Windows-1252 to Unicode.
658		* @param c char to decode
659		* @return decoded char
660		*/
661	0	protected static int decodeWin1252(int c)
662		{
663	0	return WIN2UNICODE[c - 128];
664		}
665
666		/**
667		* Function to convert from MacRoman to Unicode.
668		* @param c char to decode
669		* @return decoded char
670		*/
671	0	protected static int decodeMacRoman(int c)
672		{
673	0	if (127 < c)
674		{
675	0	c = MAC2UNICODE[c - 128];
676		}
677	0	return c;
678		}
679
680		/**
681		* Function to convert from Symbol Font chars to Unicode.
682		* @param c char to decode
683		* @return decoded char
684		*/
685	0	static int decodeSymbolFont(int c)
686		{
687	0	if (c > 255)
688		{
689	0	return c;
690		}
691
692	0	return SYMBOL2UNICODE[c];
693		}
694
695		/**
696		* Decodes an array of bytes to a char.
697		* @param c will contain the decoded char
698		* @param firstByte first input byte
699		* @param successorBytes array containing successor bytes (can be null if a getter is provided).
700		* @param getter callback used to get new bytes if successorBytes doesn't contain enough bytes
701		* @param count will contain the number of bytes read
702		* @param startInSuccessorBytesArray starting offset for bytes in successorBytes
703		* @return <code>true</code> if error
704		*/
705	1025	static boolean decodeUTF8BytesToChar(int[] c, int firstByte, byte[] successorBytes, GetBytes getter, int[] count,
706		int startInSuccessorBytesArray)
707		{
708	1025	byte[] buf = new byte[10];
709
710	1025	int ch = 0;
711	1025	int n = 0;
712	1025	int i, bytes = 0;
713	1025	boolean hasError = false;
714
715	1025	if (successorBytes.length != 0)
716		{
717	1025	buf = successorBytes;
718		}
719
720		// special check if we have been passed an EOF char
721	1025	if (firstByte == StreamIn.END_OF_STREAM) //uint
722		{
723		// at present
724	0	c[0] = firstByte;
725	0	count[0] = 1;
726	0	return false;
727		}
728
729	1025	ch = TidyUtils.toUnsigned(firstByte); // first byte is passed in separately
730
731	1025	if (ch <= 0x7F) // 0XXX XXXX one byte
732		{
733	3	n = ch;
734	3	bytes = 1;
735		}
736	1022	else if ((ch & 0xE0) == 0xC0) /* 110X XXXX two bytes */
737		{
738	912	n = ch & 31;
739	912	bytes = 2;
740		}
741	110	else if ((ch & 0xF0) == 0xE0) /* 1110 XXXX three bytes */
742		{
743	110	n = ch & 15;
744	110	bytes = 3;
745		}
746	0	else if ((ch & 0xF8) == 0xF0) /* 1111 0XXX four bytes */
747		{
748	0	n = ch & 7;
749	0	bytes = 4;
750		}
751	0	else if ((ch & 0xFC) == 0xF8) /* 1111 10XX five bytes */
752		{
753	0	n = ch & 3;
754	0	bytes = 5;
755	0	hasError = true;
756		}
757	0	else if ((ch & 0xFE) == 0xFC) /* 1111 110X six bytes */
758		{
759	0	n = ch & 1;
760	0	bytes = 6;
761	0	hasError = true;
762		}
763		else
764		{
765		// not a valid first byte of a UTF-8 sequence
766	0	n = ch;
767	0	bytes = 1;
768	0	hasError = true;
769		}
770
771	1025	for (i = 1; i < bytes; ++i)
772		{
773	1132	int[] tempCount = new int[1]; // no. of additional bytes to get
774
775		// successor bytes should have the form 10XX XXXX
776	1132	if (getter != null && (bytes - i > 0))
777		{
778	0	tempCount[0] = 1; // to simplify things, get 1 byte at a time
779	0	int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i - 1]};
780
781	0	getter.doGet(buftocopy, tempCount, false);
782		//readRawBytesFromStream(buftocopy, tempCount, false);
783	0	if (tempCount[0] <= 0) // EOF
784		{
785	0	hasError = true;
786	0	bytes = i;
787	0	break;
788		}
789		}
790
791	1132	if ((buf[startInSuccessorBytesArray + i - 1] & 0xC0) != 0x80)
792		{
793		// illegal successor byte value
794	0	hasError = true;
795	0	bytes = i;
796	0	if (getter != null)
797		{
798	0	int[] buftocopy = new int[]{buf[startInSuccessorBytesArray + i - 1]};
799	0	tempCount[0] = 1; // to simplify things, unget 1 byte at a time
800	0	getter.doGet(buftocopy, tempCount, true);
801		}
802	0	break;
803		}
804
805	1132	n = (n << 6) \| (buf[startInSuccessorBytesArray + i - 1] & 0x3F);
806		}
807
808	1025	if (!hasError && ((n == UTF8_BYTE_SWAP_NOT_A_CHAR) \|\| (n == UTF8_NOT_A_CHAR)))
809		{
810	0	hasError = true;
811		}
812
813	1025	if (!hasError && (n > MAX_UTF8_FROM_UCS4))
814		{
815	0	hasError = true;
816		}
817
818	1025	if (!hasError && (n >= UTF16_LOW_SURROGATE_BEGIN) && (n <= UTF16_HIGH_SURROGATE_END))
819		{
820		// unpaired surrogates not allowed
821	0	hasError = true;
822		}
823
824	1025	if (!hasError)
825		{
826	1025	int lo = OFFSET_UTF8_SEQUENCES[bytes - 1];
827	1025	int hi = OFFSET_UTF8_SEQUENCES[bytes] - 1;
828
829		// check for overlong sequences
830	1025	if ((n < VALID_UTF8[lo].lowChar) \|\| (n > VALID_UTF8[hi].highChar))
831		{
832	0	hasError = true;
833		}
834		else
835		{
836	1025	hasError = true; // assume error until proven otherwise
837
838	1025	for (i = lo; i <= hi; i++)
839		{
840	1135	int tempCount;
841	1135	char theByte; //unsigned
842
843	1135	for (tempCount = 0; tempCount < bytes; tempCount++)
844		{
845	2291	if (!TidyUtils.toBoolean(tempCount))
846		{
847	1135	theByte = (char) firstByte;
848		}
849		else
850		{
851	1156	theByte = (char) buf[startInSuccessorBytesArray + tempCount - 1];
852		}
853	2291	if ((theByte >= VALID_UTF8[i].validBytes[(tempCount * 2)])
854		&& (theByte <= VALID_UTF8[i].validBytes[(tempCount * 2) + 1]))
855		{
856	1025	hasError = false;
857		}
858	2291	if (hasError)
859		{
860	98	break;
861		}
862		}
863		}
864		}
865		}
866
867	1025	count[0] = bytes;
868
869	1025	c[0] = n;
870
871		// n = 0xFFFD;
872		// replacement char - do this in the caller
873	1025	return hasError;
874
875		}
876
877		/**
878		* Encode a char to an array of bytes.
879		* @param c char to encode
880		* @param encodebuf will contain the decoded bytes
881		* @param putter if not null it will be called to write bytes to out
882		* @param count number of bytes written
883		* @return <code>false</code>= ok, <code>true</code>= error
884		*/
885	331874	static boolean encodeCharToUTF8Bytes(int c, byte[] encodebuf, PutBytes putter, int[] count)
886		{
887	331874	int bytes = 0;
888
889	331874	byte[] buf = new byte[10];
890
891	331874	if (encodebuf != null)
892		{
893	331874	buf = encodebuf;
894		}
895
896	331874	boolean hasError = false;
897
898	331874	if (c <= 0x7F) // 0XXX XXXX one byte
899		{
900	319513	buf[0] = (byte) c;
901	319513	bytes = 1;
902		}
903	12361	else if (c <= 0x7FF) // 110X XXXX two bytes
904		{
905	12251	buf[0] = (byte) (0xC0 \| (c >> 6));
906	12251	buf[1] = (byte) (0x80 \| (c & 0x3F));
907	12251	bytes = 2;
908		}
909	110	else if (c <= 0xFFFF) // 1110 XXXX three bytes
910		{
911	110	buf[0] = (byte) (0xE0 \| (c >> 12));
912	110	buf[1] = (byte) (0x80 \| ((c >> 6) & 0x3F));
913	110	buf[2] = (byte) (0x80 \| (c & 0x3F));
914	110	bytes = 3;
915	110	if ((c == UTF8_BYTE_SWAP_NOT_A_CHAR) \|\| (c == UTF8_NOT_A_CHAR))
916		{
917	0	hasError = true;
918		}
919	110	else if ((c >= UTF16_LOW_SURROGATE_BEGIN) && (c <= UTF16_HIGH_SURROGATE_END))
920		{
921		// unpaired surrogates not allowed
922	0	hasError = true;
923		}
924		}
925	0	else if (c <= 0x1FFFFF) // 1111 0XXX four bytes
926		{
927	0	buf[0] = (byte) (0xF0 \| (c >> 18));
928	0	buf[1] = (byte) (0x80 \| ((c >> 12) & 0x3F));
929	0	buf[2] = (byte) (0x80 \| ((c >> 6) & 0x3F));
930	0	buf[3] = (byte) (0x80 \| (c & 0x3F));
931	0	bytes = 4;
932	0	if (c > MAX_UTF8_FROM_UCS4)
933		{
934	0	hasError = true;
935		}
936		}
937	0	else if (c <= 0x3FFFFFF) // 1111 10XX five bytes
938		{
939	0	buf[0] = (byte) (0xF8 \| (c >> 24));
940	0	buf[1] = (byte) (0x80 \| (c >> 18));
941	0	buf[2] = (byte) (0x80 \| ((c >> 12) & 0x3F));
942	0	buf[3] = (byte) (0x80 \| ((c >> 6) & 0x3F));
943	0	buf[4] = (byte) (0x80 \| (c & 0x3F));
944	0	bytes = 5;
945	0	hasError = true;
946		}
947	0	else if (c <= 0x7FFFFFFF) // 1111 110X six bytes
948		{
949	0	buf[0] = (byte) (0xFC \| (c >> 30));
950	0	buf[1] = (byte) (0x80 \| ((c >> 24) & 0x3F));
951	0	buf[2] = (byte) (0x80 \| ((c >> 18) & 0x3F));
952	0	buf[3] = (byte) (0x80 \| ((c >> 12) & 0x3F));
953	0	buf[4] = (byte) (0x80 \| ((c >> 6) & 0x3F));
954	0	buf[5] = (byte) (0x80 \| (c & 0x3F));
955	0	bytes = 6;
956	0	hasError = true;
957		}
958		else
959		{
960	0	hasError = true;
961		}
962
963	331874	if (!hasError && putter != null) // don't output invalid UTF-8 byte sequence to a stream
964		{
965	0	int[] tempCount = new int[]{bytes};
966	0	putter.doPut(buf, tempCount);
967
968	0	if (tempCount[0] < bytes)
969		{
970	0	hasError = true;
971		}
972		}
973
974	331874	count[0] = bytes;
975	331874	return hasError;
976		}
977
978		/**
979		* Getter callback: called to retrieve 1 or more additional UTF-8 bytes. The Getter callback can also unget if
980		* necessary to re-synchronize the input stream.
981		*/
982		static interface GetBytes
983		{
984
985		/**
986		* Get one or more byte.
987		* @param buf will contain the bytes.
988		* @param count number of bytes actually stored in "buf". <= 0 if error or EOF
989		* @param unget unget bytes?
990		*/
991		void doGet(int[] buf, int[] count, boolean unget);
992		}
993
994		/**
995		* Putter callbacks: called to store 1 or more additional UTF-8 bytes.
996		*/
997		static interface PutBytes
998		{
999
1000		/**
1001		* Store one or more byte.
1002		* @param buf will contain the bytes.
1003		* @param count number of bytes actually stored in "buf". <= 0 if error or EOF
1004		*/
1005		void doPut(byte[] buf, int[] count);
1006		}
1007		}