1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55 package org.w3c.tidy;
56
57 /**
58 * Utility class with handy methods, mainly for String handling or for reproducing c behaviours.
59 * @author Fabrizio Giustina
60 * @version $Revision $ ($Author $)
61 */
62 public final class TidyUtils
63 {
64
65 /**
66 * char type: digit.
67 */
68 private static final short DIGIT = 1;
69
70 /**
71 * char type: letter.
72 */
73 private static final short LETTER = 2;
74
75 /**
76 * char type: namechar.
77 */
78 private static final short NAMECHAR = 4;
79
80 /**
81 * char type: whitespace.
82 */
83 private static final short WHITE = 8;
84
85 /**
86 * char type: newline.
87 */
88 private static final short NEWLINE = 16;
89
90 /**
91 * char type: lowercase.
92 */
93 private static final short LOWERCASE = 32;
94
95 /**
96 * char type: uppercase.
97 */
98 private static final short UPPERCASE = 64;
99
100 /**
101 * used to classify chars for lexical purposes.
102 */
103 private static short[] lexmap = new short[128];
104
105 static
106 {
107 mapStr("\r\n\f", (short) (NEWLINE | WHITE));
108 mapStr(" \t", WHITE);
109 mapStr("-.:_", NAMECHAR);
110 mapStr("0123456789", (short) (DIGIT | NAMECHAR));
111 mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER | NAMECHAR));
112 mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER | NAMECHAR));
113 }
114
115 /**
116 * utility class, don't instantiate.
117 */
118 private TidyUtils()
119 {
120
121 }
122
123 /**
124 * Converts a int to a boolean.
125 * @param value int value
126 * @return <code>true</code> if value is != 0
127 */
128 static boolean toBoolean(int value)
129 {
130 return value != 0;
131 }
132
133 /**
134 * convert an int to unsigned (& 0xFF).
135 * @param c signed int
136 * @return unsigned int
137 */
138 static int toUnsigned(int c)
139 {
140 return c & 0xFF;
141 }
142
143 /**
144 * check if the first String contains the second one.
145 * @param s1 full String
146 * @param len1 maximum position in String
147 * @param s2 String to search for
148 * @return true if s1 contains s2 in the range 0-len1
149 */
150 static boolean wsubstrn(String s1, int len1, String s2)
151 {
152 int searchIndex = s1.indexOf(s2);
153 return searchIndex > -1 && searchIndex <= len1;
154 }
155
156 /**
157 * check if the first String contains the second one (ignore case).
158 * @param s1 full String
159 * @param len1 maximum position in String
160 * @param s2 String to search for
161 * @return true if s1 contains s2 in the range 0-len1
162 */
163 static boolean wsubstrncase(String s1, int len1, String s2)
164 {
165 return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase());
166 }
167
168 /**
169 * return offset of cc from beginning of s1, -1 if not found.
170 * @param s1 String
171 * @param len1 maximum offset (values > than lenl are ignored and returned as -1)
172 * @param cc character to search for
173 * @return index of cc in s1
174 */
175 static int wstrnchr(String s1, int len1, char cc)
176 {
177 int indexOf = s1.indexOf(cc);
178 if (indexOf < len1)
179 {
180 return indexOf;
181 }
182
183 return -1;
184 }
185
186 /**
187 * Same as wsubstrn, but without a specified length.
188 * @param s1 full String
189 * @param s2 String to search for
190 * @return <code>true</code> if s2 is found in s2 (case insensitive search)
191 */
192 static boolean wsubstr(String s1, String s2)
193 {
194 int i;
195 int len1 = s1.length();
196 int len2 = s2.length();
197
198 for (i = 0; i <= len1 - len2; ++i)
199 {
200 if (s2.equalsIgnoreCase(s1.substring(i)))
201 {
202 return true;
203 }
204 }
205
206 return false;
207 }
208
209 /**
210 * Is the character a hex digit?
211 * @param c char
212 * @return <code>true</code> if he given character is a hex digit
213 */
214 static boolean isxdigit(char c)
215 {
216 return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a' && Character.toLowerCase(c) <= 'f');
217 }
218
219 /**
220 * Check if the string valueToCheck is contained in validValues array (case insesitie comparison).
221 * @param validValues array of valid values
222 * @param valueToCheck value to search for
223 * @return <code>true</code> if valueToCheck is found in validValues
224 */
225 static boolean isInValuesIgnoreCase(String[] validValues, String valueToCheck)
226 {
227 int len = validValues.length;
228 for (int j = 0; j < len; j++)
229 {
230 if (validValues[j].equalsIgnoreCase(valueToCheck))
231 {
232 return true;
233 }
234 }
235 return false;
236 }
237
238 /**
239 * Return true if substring s is in p and isn't all in upper case. This is used to check the case of SYSTEM, PUBLIC,
240 * DTD and EN.
241 * @param s substring
242 * @param p full string
243 * @param len how many chars to check in p
244 * @return true if substring s is in p and isn't all in upper case
245 */
246 public static boolean findBadSubString(String s, String p, int len)
247 {
248 int n = s.length();
249 int i = 0;
250 String ps;
251
252 while (n < len)
253 {
254 ps = p.substring(i, i + n);
255 if (s.equalsIgnoreCase(ps))
256 {
257 return (!ps.equals(s.substring(0, n)));
258 }
259
260 ++i;
261 --len;
262 }
263
264 return false;
265 }
266
267 /**
268 * Is the given char a valid xml letter?
269 * @param c char
270 * @return <code>true</code> if the char is a valid xml letter
271 */
272 static boolean isXMLLetter(char c)
273 {
274 return ((c >= 0x41 && c <= 0x5a)
275 || (c >= 0x61 && c <= 0x7a)
276 || (c >= 0xc0 && c <= 0xd6)
277 || (c >= 0xd8 && c <= 0xf6)
278 || (c >= 0xf8 && c <= 0xff)
279 || (c >= 0x100 && c <= 0x131)
280 || (c >= 0x134 && c <= 0x13e)
281 || (c >= 0x141 && c <= 0x148)
282 || (c >= 0x14a && c <= 0x17e)
283 || (c >= 0x180 && c <= 0x1c3)
284 || (c >= 0x1cd && c <= 0x1f0)
285 || (c >= 0x1f4 && c <= 0x1f5)
286 || (c >= 0x1fa && c <= 0x217)
287 || (c >= 0x250 && c <= 0x2a8)
288 || (c >= 0x2bb && c <= 0x2c1)
289 || c == 0x386
290 || (c >= 0x388 && c <= 0x38a)
291 || c == 0x38c
292 || (c >= 0x38e && c <= 0x3a1)
293 || (c >= 0x3a3 && c <= 0x3ce)
294 || (c >= 0x3d0 && c <= 0x3d6)
295 || c == 0x3da
296 || c == 0x3dc
297 || c == 0x3de
298 || c == 0x3e0
299 || (c >= 0x3e2 && c <= 0x3f3)
300 || (c >= 0x401 && c <= 0x40c)
301 || (c >= 0x40e && c <= 0x44f)
302 || (c >= 0x451 && c <= 0x45c)
303 || (c >= 0x45e && c <= 0x481)
304 || (c >= 0x490 && c <= 0x4c4)
305 || (c >= 0x4c7 && c <= 0x4c8)
306 || (c >= 0x4cb && c <= 0x4cc)
307 || (c >= 0x4d0 && c <= 0x4eb)
308 || (c >= 0x4ee && c <= 0x4f5)
309 || (c >= 0x4f8 && c <= 0x4f9)
310 || (c >= 0x531 && c <= 0x556)
311 || c == 0x559
312 || (c >= 0x561 && c <= 0x586)
313 || (c >= 0x5d0 && c <= 0x5ea)
314 || (c >= 0x5f0 && c <= 0x5f2)
315 || (c >= 0x621 && c <= 0x63a)
316 || (c >= 0x641 && c <= 0x64a)
317 || (c >= 0x671 && c <= 0x6b7)
318 || (c >= 0x6ba && c <= 0x6be)
319 || (c >= 0x6c0 && c <= 0x6ce)
320 || (c >= 0x6d0 && c <= 0x6d3)
321 || c == 0x6d5
322 || (c >= 0x6e5 && c <= 0x6e6)
323 || (c >= 0x905 && c <= 0x939)
324 || c == 0x93d
325 || (c >= 0x958 && c <= 0x961)
326 || (c >= 0x985 && c <= 0x98c)
327 || (c >= 0x98f && c <= 0x990)
328 || (c >= 0x993 && c <= 0x9a8)
329 || (c >= 0x9aa && c <= 0x9b0)
330 || c == 0x9b2
331 || (c >= 0x9b6 && c <= 0x9b9)
332 || (c >= 0x9dc && c <= 0x9dd)
333 || (c >= 0x9df && c <= 0x9e1)
334 || (c >= 0x9f0 && c <= 0x9f1)
335 || (c >= 0xa05 && c <= 0xa0a)
336 || (c >= 0xa0f && c <= 0xa10)
337 || (c >= 0xa13 && c <= 0xa28)
338 || (c >= 0xa2a && c <= 0xa30)
339 || (c >= 0xa32 && c <= 0xa33)
340 || (c >= 0xa35 && c <= 0xa36)
341 || (c >= 0xa38 && c <= 0xa39)
342 || (c >= 0xa59 && c <= 0xa5c)
343 || c == 0xa5e
344 || (c >= 0xa72 && c <= 0xa74)
345 || (c >= 0xa85 && c <= 0xa8b)
346 || c == 0xa8d
347 || (c >= 0xa8f && c <= 0xa91)
348 || (c >= 0xa93 && c <= 0xaa8)
349 || (c >= 0xaaa && c <= 0xab0)
350 || (c >= 0xab2 && c <= 0xab3)
351 || (c >= 0xab5 && c <= 0xab9)
352 || c == 0xabd
353 || c == 0xae0
354 || (c >= 0xb05 && c <= 0xb0c)
355 || (c >= 0xb0f && c <= 0xb10)
356 || (c >= 0xb13 && c <= 0xb28)
357 || (c >= 0xb2a && c <= 0xb30)
358 || (c >= 0xb32 && c <= 0xb33)
359 || (c >= 0xb36 && c <= 0xb39)
360 || c == 0xb3d
361 || (c >= 0xb5c && c <= 0xb5d)
362 || (c >= 0xb5f && c <= 0xb61)
363 || (c >= 0xb85 && c <= 0xb8a)
364 || (c >= 0xb8e && c <= 0xb90)
365 || (c >= 0xb92 && c <= 0xb95)
366 || (c >= 0xb99 && c <= 0xb9a)
367 || c == 0xb9c
368 || (c >= 0xb9e && c <= 0xb9f)
369 || (c >= 0xba3 && c <= 0xba4)
370 || (c >= 0xba8 && c <= 0xbaa)
371 || (c >= 0xbae && c <= 0xbb5)
372 || (c >= 0xbb7 && c <= 0xbb9)
373 || (c >= 0xc05 && c <= 0xc0c)
374 || (c >= 0xc0e && c <= 0xc10)
375 || (c >= 0xc12 && c <= 0xc28)
376 || (c >= 0xc2a && c <= 0xc33)
377 || (c >= 0xc35 && c <= 0xc39)
378 || (c >= 0xc60 && c <= 0xc61)
379 || (c >= 0xc85 && c <= 0xc8c)
380 || (c >= 0xc8e && c <= 0xc90)
381 || (c >= 0xc92 && c <= 0xca8)
382 || (c >= 0xcaa && c <= 0xcb3)
383 || (c >= 0xcb5 && c <= 0xcb9)
384 || c == 0xcde
385 || (c >= 0xce0 && c <= 0xce1)
386 || (c >= 0xd05 && c <= 0xd0c)
387 || (c >= 0xd0e && c <= 0xd10)
388 || (c >= 0xd12 && c <= 0xd28)
389 || (c >= 0xd2a && c <= 0xd39)
390 || (c >= 0xd60 && c <= 0xd61)
391 || (c >= 0xe01 && c <= 0xe2e)
392 || c == 0xe30
393 || (c >= 0xe32 && c <= 0xe33)
394 || (c >= 0xe40 && c <= 0xe45)
395 || (c >= 0xe81 && c <= 0xe82)
396 || c == 0xe84
397 || (c >= 0xe87 && c <= 0xe88)
398 || c == 0xe8a
399 || c == 0xe8d
400 || (c >= 0xe94 && c <= 0xe97)
401 || (c >= 0xe99 && c <= 0xe9f)
402 || (c >= 0xea1 && c <= 0xea3)
403 || c == 0xea5
404 || c == 0xea7
405 || (c >= 0xeaa && c <= 0xeab)
406 || (c >= 0xead && c <= 0xeae)
407 || c == 0xeb0
408 || (c >= 0xeb2 && c <= 0xeb3)
409 || c == 0xebd
410 || (c >= 0xec0 && c <= 0xec4)
411 || (c >= 0xf40 && c <= 0xf47)
412 || (c >= 0xf49 && c <= 0xf69)
413 || (c >= 0x10a0 && c <= 0x10c5)
414 || (c >= 0x10d0 && c <= 0x10f6)
415 || c == 0x1100
416 || (c >= 0x1102 && c <= 0x1103)
417 || (c >= 0x1105 && c <= 0x1107)
418 || c == 0x1109
419 || (c >= 0x110b && c <= 0x110c)
420 || (c >= 0x110e && c <= 0x1112)
421 || c == 0x113c
422 || c == 0x113e
423 || c == 0x1140
424 || c == 0x114c
425 || c == 0x114e
426 || c == 0x1150
427 || (c >= 0x1154 && c <= 0x1155)
428 || c == 0x1159
429 || (c >= 0x115f && c <= 0x1161)
430 || c == 0x1163
431 || c == 0x1165
432 || c == 0x1167
433 || c == 0x1169
434 || (c >= 0x116d && c <= 0x116e)
435 || (c >= 0x1172 && c <= 0x1173)
436 || c == 0x1175
437 || c == 0x119e
438 || c == 0x11a8
439 || c == 0x11ab
440 || (c >= 0x11ae && c <= 0x11af)
441 || (c >= 0x11b7 && c <= 0x11b8)
442 || c == 0x11ba
443 || (c >= 0x11bc && c <= 0x11c2)
444 || c == 0x11eb
445 || c == 0x11f0
446 || c == 0x11f9
447 || (c >= 0x1e00 && c <= 0x1e9b)
448 || (c >= 0x1ea0 && c <= 0x1ef9)
449 || (c >= 0x1f00 && c <= 0x1f15)
450 || (c >= 0x1f18 && c <= 0x1f1d)
451 || (c >= 0x1f20 && c <= 0x1f45)
452 || (c >= 0x1f48 && c <= 0x1f4d)
453 || (c >= 0x1f50 && c <= 0x1f57)
454 || c == 0x1f59
455 || c == 0x1f5b
456 || c == 0x1f5d
457 || (c >= 0x1f5f && c <= 0x1f7d)
458 || (c >= 0x1f80 && c <= 0x1fb4)
459 || (c >= 0x1fb6 && c <= 0x1fbc)
460 || c == 0x1fbe
461 || (c >= 0x1fc2 && c <= 0x1fc4)
462 || (c >= 0x1fc6 && c <= 0x1fcc)
463 || (c >= 0x1fd0 && c <= 0x1fd3)
464 || (c >= 0x1fd6 && c <= 0x1fdb)
465 || (c >= 0x1fe0 && c <= 0x1fec)
466 || (c >= 0x1ff2 && c <= 0x1ff4)
467 || (c >= 0x1ff6 && c <= 0x1ffc)
468 || c == 0x2126
469 || (c >= 0x212a && c <= 0x212b)
470 || c == 0x212e
471 || (c >= 0x2180 && c <= 0x2182)
472 || (c >= 0x3041 && c <= 0x3094)
473 || (c >= 0x30a1 && c <= 0x30fa)
474 || (c >= 0x3105 && c <= 0x312c)
475 || (c >= 0xac00 && c <= 0xd7a3)
476 || (c >= 0x4e00 && c <= 0x9fa5)
477 || c == 0x3007
478 || (c >= 0x3021 && c <= 0x3029)
479 || (c >= 0x4e00 && c <= 0x9fa5)
480 || c == 0x3007 || (c >= 0x3021 && c <= 0x3029));
481 }
482
483 /**
484 * Is the given char valid in xml name?
485 * @param c char
486 * @return <code>true</code> if the char is a valid xml name char
487 */
488 static boolean isXMLNamechar(char c)
489 {
490 return (isXMLLetter(c)
491 || c == '.'
492 || c == '_'
493 || c == ':'
494 || c == '-'
495 || (c >= 0x300 && c <= 0x345)
496 || (c >= 0x360 && c <= 0x361)
497 || (c >= 0x483 && c <= 0x486)
498 || (c >= 0x591 && c <= 0x5a1)
499 || (c >= 0x5a3 && c <= 0x5b9)
500 || (c >= 0x5bb && c <= 0x5bd)
501 || c == 0x5bf
502 || (c >= 0x5c1 && c <= 0x5c2)
503 || c == 0x5c4
504 || (c >= 0x64b && c <= 0x652)
505 || c == 0x670
506 || (c >= 0x6d6 && c <= 0x6dc)
507 || (c >= 0x6dd && c <= 0x6df)
508 || (c >= 0x6e0 && c <= 0x6e4)
509 || (c >= 0x6e7 && c <= 0x6e8)
510 || (c >= 0x6ea && c <= 0x6ed)
511 || (c >= 0x901 && c <= 0x903)
512 || c == 0x93c
513 || (c >= 0x93e && c <= 0x94c)
514 || c == 0x94d
515 || (c >= 0x951 && c <= 0x954)
516 || (c >= 0x962 && c <= 0x963)
517 || (c >= 0x981 && c <= 0x983)
518 || c == 0x9bc
519 || c == 0x9be
520 || c == 0x9bf
521 || (c >= 0x9c0 && c <= 0x9c4)
522 || (c >= 0x9c7 && c <= 0x9c8)
523 || (c >= 0x9cb && c <= 0x9cd)
524 || c == 0x9d7
525 || (c >= 0x9e2 && c <= 0x9e3)
526 || c == 0xa02
527 || c == 0xa3c
528 || c == 0xa3e
529 || c == 0xa3f
530 || (c >= 0xa40 && c <= 0xa42)
531 || (c >= 0xa47 && c <= 0xa48)
532 || (c >= 0xa4b && c <= 0xa4d)
533 || (c >= 0xa70 && c <= 0xa71)
534 || (c >= 0xa81 && c <= 0xa83)
535 || c == 0xabc
536 || (c >= 0xabe && c <= 0xac5)
537 || (c >= 0xac7 && c <= 0xac9)
538 || (c >= 0xacb && c <= 0xacd)
539 || (c >= 0xb01 && c <= 0xb03)
540 || c == 0xb3c
541 || (c >= 0xb3e && c <= 0xb43)
542 || (c >= 0xb47 && c <= 0xb48)
543 || (c >= 0xb4b && c <= 0xb4d)
544 || (c >= 0xb56 && c <= 0xb57)
545 || (c >= 0xb82 && c <= 0xb83)
546 || (c >= 0xbbe && c <= 0xbc2)
547 || (c >= 0xbc6 && c <= 0xbc8)
548 || (c >= 0xbca && c <= 0xbcd)
549 || c == 0xbd7
550 || (c >= 0xc01 && c <= 0xc03)
551 || (c >= 0xc3e && c <= 0xc44)
552 || (c >= 0xc46 && c <= 0xc48)
553 || (c >= 0xc4a && c <= 0xc4d)
554 || (c >= 0xc55 && c <= 0xc56)
555 || (c >= 0xc82 && c <= 0xc83)
556 || (c >= 0xcbe && c <= 0xcc4)
557 || (c >= 0xcc6 && c <= 0xcc8)
558 || (c >= 0xcca && c <= 0xccd)
559 || (c >= 0xcd5 && c <= 0xcd6)
560 || (c >= 0xd02 && c <= 0xd03)
561 || (c >= 0xd3e && c <= 0xd43)
562 || (c >= 0xd46 && c <= 0xd48)
563 || (c >= 0xd4a && c <= 0xd4d)
564 || c == 0xd57
565 || c == 0xe31
566 || (c >= 0xe34 && c <= 0xe3a)
567 || (c >= 0xe47 && c <= 0xe4e)
568 || c == 0xeb1
569 || (c >= 0xeb4 && c <= 0xeb9)
570 || (c >= 0xebb && c <= 0xebc)
571 || (c >= 0xec8 && c <= 0xecd)
572 || (c >= 0xf18 && c <= 0xf19)
573 || c == 0xf35
574 || c == 0xf37
575 || c == 0xf39
576 || c == 0xf3e
577 || c == 0xf3f
578 || (c >= 0xf71 && c <= 0xf84)
579 || (c >= 0xf86 && c <= 0xf8b)
580 || (c >= 0xf90 && c <= 0xf95)
581 || c == 0xf97
582 || (c >= 0xf99 && c <= 0xfad)
583 || (c >= 0xfb1 && c <= 0xfb7)
584 || c == 0xfb9
585 || (c >= 0x20d0 && c <= 0x20dc)
586 || c == 0x20e1
587 || (c >= 0x302a && c <= 0x302f)
588 || c == 0x3099
589 || c == 0x309a
590 || (c >= 0x30 && c <= 0x39)
591 || (c >= 0x660 && c <= 0x669)
592 || (c >= 0x6f0 && c <= 0x6f9)
593 || (c >= 0x966 && c <= 0x96f)
594 || (c >= 0x9e6 && c <= 0x9ef)
595 || (c >= 0xa66 && c <= 0xa6f)
596 || (c >= 0xae6 && c <= 0xaef)
597 || (c >= 0xb66 && c <= 0xb6f)
598 || (c >= 0xbe7 && c <= 0xbef)
599 || (c >= 0xc66 && c <= 0xc6f)
600 || (c >= 0xce6 && c <= 0xcef)
601 || (c >= 0xd66 && c <= 0xd6f)
602 || (c >= 0xe50 && c <= 0xe59)
603 || (c >= 0xed0 && c <= 0xed9)
604 || (c >= 0xf20 && c <= 0xf29)
605 || c == 0xb7
606 || c == 0x2d0
607 || c == 0x2d1
608 || c == 0x387
609 || c == 0x640
610 || c == 0xe46
611 || c == 0xec6
612 || c == 0x3005
613 || (c >= 0x3031 && c <= 0x3035)
614 || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe));
615 }
616
617 /**
618 * Is the given character a single or double quote?
619 * @param c char
620 * @return <code>true</code> if c is " or '
621 */
622 static boolean isQuote(int c)
623 {
624 return (c == '\'' || c == '\"');
625 }
626
627 /**
628 * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
629 * throws declarations in lots of methods.
630 * @param str String
631 * @return utf8 bytes
632 * @see String#getBytes()
633 */
634 public static byte[] getBytes(String str)
635 {
636 try
637 {
638 return str.getBytes("UTF8");
639 }
640 catch (java.io.UnsupportedEncodingException e)
641 {
642 throw new Error("String to UTF-8 conversion failed: " + e.getMessage());
643 }
644 }
645
646 /**
647 * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
648 * throws declarations in lots of methods.
649 * @param bytes byte array
650 * @param offset starting offset in byte array
651 * @param length length in byte array starting from offset
652 * @return same as <code>new String(bytes, offset, length, "UTF8")</code>
653 */
654 public static String getString(byte[] bytes, int offset, int length)
655 {
656 try
657 {
658 return new String(bytes, offset, length, "UTF8");
659 }
660 catch (java.io.UnsupportedEncodingException e)
661 {
662 throw new Error("UTF-8 to string conversion failed: " + e.getMessage());
663 }
664 }
665
666 /**
667 * Return the last char in string. This is useful when trailing quotemark is missing on an attribute
668 * @param str String
669 * @return last char in String
670 */
671 public static int lastChar(String str)
672 {
673 if (str != null && str.length() > 0)
674 {
675 return str.charAt(str.length() - 1);
676 }
677
678 return 0;
679 }
680
681 /**
682 * Determines if the specified character is whitespace.
683 * @param c char
684 * @return <code>true</code> if char is whitespace.
685 */
686 public static boolean isWhite(char c)
687 {
688 short m = map(c);
689 return TidyUtils.toBoolean(m & WHITE);
690 }
691
692 /**
693 * Is the given char a digit?
694 * @param c char
695 * @return <code>true</code> if the given char is a digit
696 */
697 public static boolean isDigit(char c)
698 {
699 short m;
700 m = map(c);
701 return TidyUtils.toBoolean(m & DIGIT);
702 }
703
704 /**
705 * Is the given char a letter?
706 * @param c char
707 * @return <code>true</code> if the given char is a letter
708 */
709 public static boolean isLetter(char c)
710 {
711 short m;
712 m = map(c);
713 return TidyUtils.toBoolean(m & LETTER);
714 }
715
716 /**
717 * Is the given char valid in name? (letter, digit or "-", ".", ":", "_")
718 * @param c char
719 * @return <code>true</code> if char is a name char.
720 */
721 public static boolean isNamechar(char c)
722 {
723 short map = map(c);
724
725 return TidyUtils.toBoolean(map & NAMECHAR);
726 }
727
728 /**
729 * Determines if the specified character is a lowercase character.
730 * @param c char
731 * @return <code>true</code> if char is lower case.
732 */
733 public static boolean isLower(char c)
734 {
735 short map = map(c);
736
737 return TidyUtils.toBoolean(map & LOWERCASE);
738 }
739
740 /**
741 * Determines if the specified character is a uppercase character.
742 * @param c char
743 * @return <code>true</code> if char is upper case.
744 */
745 public static boolean isUpper(char c)
746 {
747 short map = map(c);
748
749 return TidyUtils.toBoolean(map & UPPERCASE);
750 }
751
752 /**
753 * Maps the given character to its lowercase equivalent.
754 * @param c char
755 * @return lowercase char.
756 */
757 public static char toLower(char c)
758 {
759 short m = map(c);
760
761 if (TidyUtils.toBoolean(m & UPPERCASE))
762 {
763 c = (char) (c + 'a' - 'A');
764 }
765
766 return c;
767 }
768
769 /**
770 * Maps the given character to its uppercase equivalent.
771 * @param c char
772 * @return uppercase char.
773 */
774 public static char toUpper(char c)
775 {
776 short m = map(c);
777
778 if (TidyUtils.toBoolean(m & LOWERCASE))
779 {
780 c = (char) (c + 'A' - 'a');
781 }
782
783 return c;
784 }
785
786 /**
787 * Fold case of a char.
788 * @param c char
789 * @param tocaps convert to caps
790 * @param xmlTags use xml tags? If true no change will be performed
791 * @return folded char
792 * @todo check the use of xmlTags parameter
793 */
794 public static char foldCase(char c, boolean tocaps, boolean xmlTags)
795 {
796
797 if (!xmlTags)
798 {
799
800 if (tocaps)
801 {
802 if (isLower(c))
803 {
804 c = toUpper(c);
805 }
806 }
807 else
808 {
809
810 if (isUpper(c))
811 {
812 c = toLower(c);
813 }
814 }
815 }
816
817 return c;
818 }
819
820 /**
821 * Classify chars in String and put them in lexmap.
822 * @param str String
823 * @param code code associated to chars in the String
824 */
825 private static void mapStr(String str, short code)
826 {
827 int c;
828 for (int i = 0; i < str.length(); i++)
829 {
830 c = str.charAt(i);
831 lexmap[c] |= code;
832 }
833 }
834
835 /**
836 * Returns the constant which defines the classification of char in lexmap.
837 * @param c char
838 * @return char type
839 */
840 private static short map(char c)
841 {
842 return (c < 128 ? lexmap[c] : 0);
843 }
844
845 /**
846 * Is the given character encoding supported?
847 * @param name character encoding name
848 * @return <code>true</code> if encoding is supported, false otherwhise.
849 */
850 public static boolean isCharEncodingSupported(String name)
851 {
852 name = EncodingNameMapper.toJava(name);
853 if (name == null)
854 {
855 return false;
856 }
857
858 try
859 {
860 "".getBytes(name);
861 }
862 catch (java.io.UnsupportedEncodingException e)
863 {
864 return false;
865 }
866 return true;
867 }
868 }