Clover coverage report - Maven Clover report
Coverage timestamp: Tue Aug 1 2006 15:09:51 CEST
file stats: LOC: 868   Methods: 28
NCLOC: 569   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
TidyUtils.java 67.6% 76.9% 75% 74.5%
coverage coverage
 1    /*
 2    * Java HTML Tidy - JTidy
 3    * HTML parser and pretty printer
 4    *
 5    * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 6    * Institute of Technology, Institut National de Recherche en
 7    * Informatique et en Automatique, Keio University). All Rights
 8    * Reserved.
 9    *
 10    * Contributing Author(s):
 11    *
 12    * Dave Raggett <dsr@w3.org>
 13    * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
 14    * Gary L Peskin <garyp@firstech.com> (Java development)
 15    * Sami Lempinen <sami@lempinen.net> (release management)
 16    * Fabrizio Giustina <fgiust at users.sourceforge.net>
 17    *
 18    * The contributing author(s) would like to thank all those who
 19    * helped with testing, bug fixes, and patience. This wouldn't
 20    * have been possible without all of you.
 21    *
 22    * COPYRIGHT NOTICE:
 23    *
 24    * This software and documentation is provided "as is," and
 25    * the copyright holders and contributing author(s) make no
 26    * representations or warranties, express or implied, including
 27    * but not limited to, warranties of merchantability or fitness
 28    * for any particular purpose or that the use of the software or
 29    * documentation will not infringe any third party patents,
 30    * copyrights, trademarks or other rights.
 31    *
 32    * The copyright holders and contributing author(s) will not be
 33    * liable for any direct, indirect, special or consequential damages
 34    * arising out of any use of the software or documentation, even if
 35    * advised of the possibility of such damage.
 36    *
 37    * Permission is hereby granted to use, copy, modify, and distribute
 38    * this source code, or portions hereof, documentation and executables,
 39    * for any purpose, without fee, subject to the following restrictions:
 40    *
 41    * 1. The origin of this source code must not be misrepresented.
 42    * 2. Altered versions must be plainly marked as such and must
 43    * not be misrepresented as being the original source.
 44    * 3. This Copyright notice may not be removed or altered from any
 45    * source or altered source distribution.
 46    *
 47    * The copyright holders and contributing author(s) specifically
 48    * permit, without fee, and encourage the use of this source code
 49    * as a component for supporting the Hypertext Markup Language in
 50    * commercial products. If you use this source code in a product,
 51    * acknowledgment is not required but would be appreciated.
 52    *
 53    */
 54   
 55    package org.w3c.tidy;
 56   
 57    /**
 58    * Utility class with handy methods, mainly for String handling or for reproducing c behaviours.
 59    * @author Fabrizio Giustina
 60    * @version $Revision $ ($Author $)
 61    */
 62    public final class TidyUtils
 63    {
 64   
 65    /**
 66    * char type: digit.
 67    */
 68    private static final short DIGIT = 1;
 69   
 70    /**
 71    * char type: letter.
 72    */
 73    private static final short LETTER = 2;
 74   
 75    /**
 76    * char type: namechar.
 77    */
 78    private static final short NAMECHAR = 4;
 79   
 80    /**
 81    * char type: whitespace.
 82    */
 83    private static final short WHITE = 8;
 84   
 85    /**
 86    * char type: newline.
 87    */
 88    private static final short NEWLINE = 16;
 89   
 90    /**
 91    * char type: lowercase.
 92    */
 93    private static final short LOWERCASE = 32;
 94   
 95    /**
 96    * char type: uppercase.
 97    */
 98    private static final short UPPERCASE = 64;
 99   
 100    /**
 101    * used to classify chars for lexical purposes.
 102    */
 103    private static short[] lexmap = new short[128];
 104   
 105    static
 106    {
 107  1 mapStr("\r\n\f", (short) (NEWLINE | WHITE));
 108  1 mapStr(" \t", WHITE);
 109  1 mapStr("-.:_", NAMECHAR);
 110  1 mapStr("0123456789", (short) (DIGIT | NAMECHAR));
 111  1 mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER | NAMECHAR));
 112  1 mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER | NAMECHAR));
 113    }
 114   
 115    /**
 116    * utility class, don't instantiate.
 117    */
 118  0 private TidyUtils()
 119    {
 120    // unused
 121    }
 122   
 123    /**
 124    * Converts a int to a boolean.
 125    * @param value int value
 126    * @return <code>true</code> if value is != 0
 127    */
 128  831238 static boolean toBoolean(int value)
 129    {
 130  831238 return value != 0;
 131    }
 132   
 133    /**
 134    * convert an int to unsigned (& 0xFF).
 135    * @param c signed int
 136    * @return unsigned int
 137    */
 138  2050 static int toUnsigned(int c)
 139    {
 140  2050 return c & 0xFF;
 141    }
 142   
 143    /**
 144    * check if the first String contains the second one.
 145    * @param s1 full String
 146    * @param len1 maximum position in String
 147    * @param s2 String to search for
 148    * @return true if s1 contains s2 in the range 0-len1
 149    */
 150  0 static boolean wsubstrn(String s1, int len1, String s2)
 151    {
 152  0 int searchIndex = s1.indexOf(s2);
 153  0 return searchIndex > -1 && searchIndex <= len1;
 154    }
 155   
 156    /**
 157    * check if the first String contains the second one (ignore case).
 158    * @param s1 full String
 159    * @param len1 maximum position in String
 160    * @param s2 String to search for
 161    * @return true if s1 contains s2 in the range 0-len1
 162    */
 163  0 static boolean wsubstrncase(String s1, int len1, String s2)
 164    {
 165  0 return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase());
 166    }
 167   
 168    /**
 169    * return offset of cc from beginning of s1, -1 if not found.
 170    * @param s1 String
 171    * @param len1 maximum offset (values > than lenl are ignored and returned as -1)
 172    * @param cc character to search for
 173    * @return index of cc in s1
 174    */
 175  0 static int wstrnchr(String s1, int len1, char cc)
 176    {
 177  0 int indexOf = s1.indexOf(cc);
 178  0 if (indexOf < len1)
 179    {
 180  0 return indexOf;
 181    }
 182   
 183  0 return -1;
 184    }
 185   
 186    /**
 187    * Same as wsubstrn, but without a specified length.
 188    * @param s1 full String
 189    * @param s2 String to search for
 190    * @return <code>true</code> if s2 is found in s2 (case insensitive search)
 191    */
 192  0 static boolean wsubstr(String s1, String s2)
 193    {
 194  0 int i;
 195  0 int len1 = s1.length();
 196  0 int len2 = s2.length();
 197   
 198  0 for (i = 0; i <= len1 - len2; ++i)
 199    {
 200  0 if (s2.equalsIgnoreCase(s1.substring(i)))
 201    {
 202  0 return true;
 203    }
 204    }
 205   
 206  0 return false;
 207    }
 208   
 209    /**
 210    * Is the character a hex digit?
 211    * @param c char
 212    * @return <code>true</code> if he given character is a hex digit
 213    */
 214  0 static boolean isxdigit(char c)
 215    {
 216  0 return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a' && Character.toLowerCase(c) <= 'f');
 217    }
 218   
 219    /**
 220    * Check if the string valueToCheck is contained in validValues array (case insesitie comparison).
 221    * @param validValues array of valid values
 222    * @param valueToCheck value to search for
 223    * @return <code>true</code> if valueToCheck is found in validValues
 224    */
 225  5457 static boolean isInValuesIgnoreCase(String[] validValues, String valueToCheck)
 226    {
 227  5457 int len = validValues.length;
 228  5457 for (int j = 0; j < len; j++)
 229    {
 230  20421 if (validValues[j].equalsIgnoreCase(valueToCheck))
 231    {
 232  582 return true;
 233    }
 234    }
 235  4875 return false;
 236    }
 237   
 238    /**
 239    * Return true if substring s is in p and isn't all in upper case. This is used to check the case of SYSTEM, PUBLIC,
 240    * DTD and EN.
 241    * @param s substring
 242    * @param p full string
 243    * @param len how many chars to check in p
 244    * @return true if substring s is in p and isn't all in upper case
 245    */
 246  590 public static boolean findBadSubString(String s, String p, int len)
 247    {
 248  590 int n = s.length();
 249  590 int i = 0;
 250  590 String ps;
 251   
 252  590 while (n < len)
 253    {
 254  18796 ps = p.substring(i, i + n);
 255  18796 if (s.equalsIgnoreCase(ps))
 256    {
 257  470 return (!ps.equals(s.substring(0, n)));
 258    }
 259   
 260  18326 ++i;
 261  18326 --len;
 262    }
 263   
 264  120 return false;
 265    }
 266   
 267    /**
 268    * Is the given char a valid xml letter?
 269    * @param c char
 270    * @return <code>true</code> if the char is a valid xml letter
 271    */
 272  4 static boolean isXMLLetter(char c)
 273    {
 274  4 return ((c >= 0x41 && c <= 0x5a)
 275    || (c >= 0x61 && c <= 0x7a)
 276    || (c >= 0xc0 && c <= 0xd6)
 277    || (c >= 0xd8 && c <= 0xf6)
 278    || (c >= 0xf8 && c <= 0xff)
 279    || (c >= 0x100 && c <= 0x131)
 280    || (c >= 0x134 && c <= 0x13e)
 281    || (c >= 0x141 && c <= 0x148)
 282    || (c >= 0x14a && c <= 0x17e)
 283    || (c >= 0x180 && c <= 0x1c3)
 284    || (c >= 0x1cd && c <= 0x1f0)
 285    || (c >= 0x1f4 && c <= 0x1f5)
 286    || (c >= 0x1fa && c <= 0x217)
 287    || (c >= 0x250 && c <= 0x2a8)
 288    || (c >= 0x2bb && c <= 0x2c1)
 289    || c == 0x386
 290    || (c >= 0x388 && c <= 0x38a)
 291    || c == 0x38c
 292    || (c >= 0x38e && c <= 0x3a1)
 293    || (c >= 0x3a3 && c <= 0x3ce)
 294    || (c >= 0x3d0 && c <= 0x3d6)
 295    || c == 0x3da
 296    || c == 0x3dc
 297    || c == 0x3de
 298    || c == 0x3e0
 299    || (c >= 0x3e2 && c <= 0x3f3)
 300    || (c >= 0x401 && c <= 0x40c)
 301    || (c >= 0x40e && c <= 0x44f)
 302    || (c >= 0x451 && c <= 0x45c)
 303    || (c >= 0x45e && c <= 0x481)
 304    || (c >= 0x490 && c <= 0x4c4)
 305    || (c >= 0x4c7 && c <= 0x4c8)
 306    || (c >= 0x4cb && c <= 0x4cc)
 307    || (c >= 0x4d0 && c <= 0x4eb)
 308    || (c >= 0x4ee && c <= 0x4f5)
 309    || (c >= 0x4f8 && c <= 0x4f9)
 310    || (c >= 0x531 && c <= 0x556)
 311    || c == 0x559
 312    || (c >= 0x561 && c <= 0x586)
 313    || (c >= 0x5d0 && c <= 0x5ea)
 314    || (c >= 0x5f0 && c <= 0x5f2)
 315    || (c >= 0x621 && c <= 0x63a)
 316    || (c >= 0x641 && c <= 0x64a)
 317    || (c >= 0x671 && c <= 0x6b7)
 318    || (c >= 0x6ba && c <= 0x6be)
 319    || (c >= 0x6c0 && c <= 0x6ce)
 320    || (c >= 0x6d0 && c <= 0x6d3)
 321    || c == 0x6d5
 322    || (c >= 0x6e5 && c <= 0x6e6)
 323    || (c >= 0x905 && c <= 0x939)
 324    || c == 0x93d
 325    || (c >= 0x958 && c <= 0x961)
 326    || (c >= 0x985 && c <= 0x98c)
 327    || (c >= 0x98f && c <= 0x990)
 328    || (c >= 0x993 && c <= 0x9a8)
 329    || (c >= 0x9aa && c <= 0x9b0)
 330    || c == 0x9b2
 331    || (c >= 0x9b6 && c <= 0x9b9)
 332    || (c >= 0x9dc && c <= 0x9dd)
 333    || (c >= 0x9df && c <= 0x9e1)
 334    || (c >= 0x9f0 && c <= 0x9f1)
 335    || (c >= 0xa05 && c <= 0xa0a)
 336    || (c >= 0xa0f && c <= 0xa10)
 337    || (c >= 0xa13 && c <= 0xa28)
 338    || (c >= 0xa2a && c <= 0xa30)
 339    || (c >= 0xa32 && c <= 0xa33)
 340    || (c >= 0xa35 && c <= 0xa36)
 341    || (c >= 0xa38 && c <= 0xa39)
 342    || (c >= 0xa59 && c <= 0xa5c)
 343    || c == 0xa5e
 344    || (c >= 0xa72 && c <= 0xa74)
 345    || (c >= 0xa85 && c <= 0xa8b)
 346    || c == 0xa8d
 347    || (c >= 0xa8f && c <= 0xa91)
 348    || (c >= 0xa93 && c <= 0xaa8)
 349    || (c >= 0xaaa && c <= 0xab0)
 350    || (c >= 0xab2 && c <= 0xab3)
 351    || (c >= 0xab5 && c <= 0xab9)
 352    || c == 0xabd
 353    || c == 0xae0
 354    || (c >= 0xb05 && c <= 0xb0c)
 355    || (c >= 0xb0f && c <= 0xb10)
 356    || (c >= 0xb13 && c <= 0xb28)
 357    || (c >= 0xb2a && c <= 0xb30)
 358    || (c >= 0xb32 && c <= 0xb33)
 359    || (c >= 0xb36 && c <= 0xb39)
 360    || c == 0xb3d
 361    || (c >= 0xb5c && c <= 0xb5d)
 362    || (c >= 0xb5f && c <= 0xb61)
 363    || (c >= 0xb85 && c <= 0xb8a)
 364    || (c >= 0xb8e && c <= 0xb90)
 365    || (c >= 0xb92 && c <= 0xb95)
 366    || (c >= 0xb99 && c <= 0xb9a)
 367    || c == 0xb9c
 368    || (c >= 0xb9e && c <= 0xb9f)
 369    || (c >= 0xba3 && c <= 0xba4)
 370    || (c >= 0xba8 && c <= 0xbaa)
 371    || (c >= 0xbae && c <= 0xbb5)
 372    || (c >= 0xbb7 && c <= 0xbb9)
 373    || (c >= 0xc05 && c <= 0xc0c)
 374    || (c >= 0xc0e && c <= 0xc10)
 375    || (c >= 0xc12 && c <= 0xc28)
 376    || (c >= 0xc2a && c <= 0xc33)
 377    || (c >= 0xc35 && c <= 0xc39)
 378    || (c >= 0xc60 && c <= 0xc61)
 379    || (c >= 0xc85 && c <= 0xc8c)
 380    || (c >= 0xc8e && c <= 0xc90)
 381    || (c >= 0xc92 && c <= 0xca8)
 382    || (c >= 0xcaa && c <= 0xcb3)
 383    || (c >= 0xcb5 && c <= 0xcb9)
 384    || c == 0xcde
 385    || (c >= 0xce0 && c <= 0xce1)
 386    || (c >= 0xd05 && c <= 0xd0c)
 387    || (c >= 0xd0e && c <= 0xd10)
 388    || (c >= 0xd12 && c <= 0xd28)
 389    || (c >= 0xd2a && c <= 0xd39)
 390    || (c >= 0xd60 && c <= 0xd61)
 391    || (c >= 0xe01 && c <= 0xe2e)
 392    || c == 0xe30
 393    || (c >= 0xe32 && c <= 0xe33)
 394    || (c >= 0xe40 && c <= 0xe45)
 395    || (c >= 0xe81 && c <= 0xe82)
 396    || c == 0xe84
 397    || (c >= 0xe87 && c <= 0xe88)
 398    || c == 0xe8a
 399    || c == 0xe8d
 400    || (c >= 0xe94 && c <= 0xe97)
 401    || (c >= 0xe99 && c <= 0xe9f)
 402    || (c >= 0xea1 && c <= 0xea3)
 403    || c == 0xea5
 404    || c == 0xea7
 405    || (c >= 0xeaa && c <= 0xeab)
 406    || (c >= 0xead && c <= 0xeae)
 407    || c == 0xeb0
 408    || (c >= 0xeb2 && c <= 0xeb3)
 409    || c == 0xebd
 410    || (c >= 0xec0 && c <= 0xec4)
 411    || (c >= 0xf40 && c <= 0xf47)
 412    || (c >= 0xf49 && c <= 0xf69)
 413    || (c >= 0x10a0 && c <= 0x10c5)
 414    || (c >= 0x10d0 && c <= 0x10f6)
 415    || c == 0x1100
 416    || (c >= 0x1102 && c <= 0x1103)
 417    || (c >= 0x1105 && c <= 0x1107)
 418    || c == 0x1109
 419    || (c >= 0x110b && c <= 0x110c)
 420    || (c >= 0x110e && c <= 0x1112)
 421    || c == 0x113c
 422    || c == 0x113e
 423    || c == 0x1140
 424    || c == 0x114c
 425    || c == 0x114e
 426    || c == 0x1150
 427    || (c >= 0x1154 && c <= 0x1155)
 428    || c == 0x1159
 429    || (c >= 0x115f && c <= 0x1161)
 430    || c == 0x1163
 431    || c == 0x1165
 432    || c == 0x1167
 433    || c == 0x1169
 434    || (c >= 0x116d && c <= 0x116e)
 435    || (c >= 0x1172 && c <= 0x1173)
 436    || c == 0x1175
 437    || c == 0x119e
 438    || c == 0x11a8
 439    || c == 0x11ab
 440    || (c >= 0x11ae && c <= 0x11af)
 441    || (c >= 0x11b7 && c <= 0x11b8)
 442    || c == 0x11ba
 443    || (c >= 0x11bc && c <= 0x11c2)
 444    || c == 0x11eb
 445    || c == 0x11f0
 446    || c == 0x11f9
 447    || (c >= 0x1e00 && c <= 0x1e9b)
 448    || (c >= 0x1ea0 && c <= 0x1ef9)
 449    || (c >= 0x1f00 && c <= 0x1f15)
 450    || (c >= 0x1f18 && c <= 0x1f1d)
 451    || (c >= 0x1f20 && c <= 0x1f45)
 452    || (c >= 0x1f48 && c <= 0x1f4d)
 453    || (c >= 0x1f50 && c <= 0x1f57)
 454    || c == 0x1f59
 455    || c == 0x1f5b
 456    || c == 0x1f5d
 457    || (c >= 0x1f5f && c <= 0x1f7d)
 458    || (c >= 0x1f80 && c <= 0x1fb4)
 459    || (c >= 0x1fb6 && c <= 0x1fbc)
 460    || c == 0x1fbe
 461    || (c >= 0x1fc2 && c <= 0x1fc4)
 462    || (c >= 0x1fc6 && c <= 0x1fcc)
 463    || (c >= 0x1fd0 && c <= 0x1fd3)
 464    || (c >= 0x1fd6 && c <= 0x1fdb)
 465    || (c >= 0x1fe0 && c <= 0x1fec)
 466    || (c >= 0x1ff2 && c <= 0x1ff4)
 467    || (c >= 0x1ff6 && c <= 0x1ffc)
 468    || c == 0x2126
 469    || (c >= 0x212a && c <= 0x212b)
 470    || c == 0x212e
 471    || (c >= 0x2180 && c <= 0x2182)
 472    || (c >= 0x3041 && c <= 0x3094)
 473    || (c >= 0x30a1 && c <= 0x30fa)
 474    || (c >= 0x3105 && c <= 0x312c)
 475    || (c >= 0xac00 && c <= 0xd7a3)
 476    || (c >= 0x4e00 && c <= 0x9fa5)
 477    || c == 0x3007
 478    || (c >= 0x3021 && c <= 0x3029)
 479    || (c >= 0x4e00 && c <= 0x9fa5)
 480    || c == 0x3007 || (c >= 0x3021 && c <= 0x3029));
 481    }
 482   
 483    /**
 484    * Is the given char valid in xml name?
 485    * @param c char
 486    * @return <code>true</code> if the char is a valid xml name char
 487    */
 488  0 static boolean isXMLNamechar(char c)
 489    {
 490  0 return (isXMLLetter(c)
 491    || c == '.'
 492    || c == '_'
 493    || c == ':'
 494    || c == '-'
 495    || (c >= 0x300 && c <= 0x345)
 496    || (c >= 0x360 && c <= 0x361)
 497    || (c >= 0x483 && c <= 0x486)
 498    || (c >= 0x591 && c <= 0x5a1)
 499    || (c >= 0x5a3 && c <= 0x5b9)
 500    || (c >= 0x5bb && c <= 0x5bd)
 501    || c == 0x5bf
 502    || (c >= 0x5c1 && c <= 0x5c2)
 503    || c == 0x5c4
 504    || (c >= 0x64b && c <= 0x652)
 505    || c == 0x670
 506    || (c >= 0x6d6 && c <= 0x6dc)
 507    || (c >= 0x6dd && c <= 0x6df)
 508    || (c >= 0x6e0 && c <= 0x6e4)
 509    || (c >= 0x6e7 && c <= 0x6e8)
 510    || (c >= 0x6ea && c <= 0x6ed)
 511    || (c >= 0x901 && c <= 0x903)
 512    || c == 0x93c
 513    || (c >= 0x93e && c <= 0x94c)
 514    || c == 0x94d
 515    || (c >= 0x951 && c <= 0x954)
 516    || (c >= 0x962 && c <= 0x963)
 517    || (c >= 0x981 && c <= 0x983)
 518    || c == 0x9bc
 519    || c == 0x9be
 520    || c == 0x9bf
 521    || (c >= 0x9c0 && c <= 0x9c4)
 522    || (c >= 0x9c7 && c <= 0x9c8)
 523    || (c >= 0x9cb && c <= 0x9cd)
 524    || c == 0x9d7
 525    || (c >= 0x9e2 && c <= 0x9e3)
 526    || c == 0xa02
 527    || c == 0xa3c
 528    || c == 0xa3e
 529    || c == 0xa3f
 530    || (c >= 0xa40 && c <= 0xa42)
 531    || (c >= 0xa47 && c <= 0xa48)
 532    || (c >= 0xa4b && c <= 0xa4d)
 533    || (c >= 0xa70 && c <= 0xa71)
 534    || (c >= 0xa81 && c <= 0xa83)
 535    || c == 0xabc
 536    || (c >= 0xabe && c <= 0xac5)
 537    || (c >= 0xac7 && c <= 0xac9)
 538    || (c >= 0xacb && c <= 0xacd)
 539    || (c >= 0xb01 && c <= 0xb03)
 540    || c == 0xb3c
 541    || (c >= 0xb3e && c <= 0xb43)
 542    || (c >= 0xb47 && c <= 0xb48)
 543    || (c >= 0xb4b && c <= 0xb4d)
 544    || (c >= 0xb56 && c <= 0xb57)
 545    || (c >= 0xb82 && c <= 0xb83)
 546    || (c >= 0xbbe && c <= 0xbc2)
 547    || (c >= 0xbc6 && c <= 0xbc8)
 548    || (c >= 0xbca && c <= 0xbcd)
 549    || c == 0xbd7
 550    || (c >= 0xc01 && c <= 0xc03)
 551    || (c >= 0xc3e && c <= 0xc44)
 552    || (c >= 0xc46 && c <= 0xc48)
 553    || (c >= 0xc4a && c <= 0xc4d)
 554    || (c >= 0xc55 && c <= 0xc56)
 555    || (c >= 0xc82 && c <= 0xc83)
 556    || (c >= 0xcbe && c <= 0xcc4)
 557    || (c >= 0xcc6 && c <= 0xcc8)
 558    || (c >= 0xcca && c <= 0xccd)
 559    || (c >= 0xcd5 && c <= 0xcd6)
 560    || (c >= 0xd02 && c <= 0xd03)
 561    || (c >= 0xd3e && c <= 0xd43)
 562    || (c >= 0xd46 && c <= 0xd48)
 563    || (c >= 0xd4a && c <= 0xd4d)
 564    || c == 0xd57
 565    || c == 0xe31
 566    || (c >= 0xe34 && c <= 0xe3a)
 567    || (c >= 0xe47 && c <= 0xe4e)
 568    || c == 0xeb1
 569    || (c >= 0xeb4 && c <= 0xeb9)
 570    || (c >= 0xebb && c <= 0xebc)
 571    || (c >= 0xec8 && c <= 0xecd)
 572    || (c >= 0xf18 && c <= 0xf19)
 573    || c == 0xf35
 574    || c == 0xf37
 575    || c == 0xf39
 576    || c == 0xf3e
 577    || c == 0xf3f
 578    || (c >= 0xf71 && c <= 0xf84)
 579    || (c >= 0xf86 && c <= 0xf8b)
 580    || (c >= 0xf90 && c <= 0xf95)
 581    || c == 0xf97
 582    || (c >= 0xf99 && c <= 0xfad)
 583    || (c >= 0xfb1 && c <= 0xfb7)
 584    || c == 0xfb9
 585    || (c >= 0x20d0 && c <= 0x20dc)
 586    || c == 0x20e1
 587    || (c >= 0x302a && c <= 0x302f)
 588    || c == 0x3099
 589    || c == 0x309a
 590    || (c >= 0x30 && c <= 0x39)
 591    || (c >= 0x660 && c <= 0x669)
 592    || (c >= 0x6f0 && c <= 0x6f9)
 593    || (c >= 0x966 && c <= 0x96f)
 594    || (c >= 0x9e6 && c <= 0x9ef)
 595    || (c >= 0xa66 && c <= 0xa6f)
 596    || (c >= 0xae6 && c <= 0xaef)
 597    || (c >= 0xb66 && c <= 0xb6f)
 598    || (c >= 0xbe7 && c <= 0xbef)
 599    || (c >= 0xc66 && c <= 0xc6f)
 600    || (c >= 0xce6 && c <= 0xcef)
 601    || (c >= 0xd66 && c <= 0xd6f)
 602    || (c >= 0xe50 && c <= 0xe59)
 603    || (c >= 0xed0 && c <= 0xed9)
 604    || (c >= 0xf20 && c <= 0xf29)
 605    || c == 0xb7
 606    || c == 0x2d0
 607    || c == 0x2d1
 608    || c == 0x387
 609    || c == 0x640
 610    || c == 0xe46
 611    || c == 0xec6
 612    || c == 0x3005
 613    || (c >= 0x3031 && c <= 0x3035)
 614    || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe));
 615    }
 616   
 617    /**
 618    * Is the given character a single or double quote?
 619    * @param c char
 620    * @return <code>true</code> if c is " or '
 621    */
 622  13068 static boolean isQuote(int c)
 623    {
 624  13068 return (c == '\'' || c == '\"');
 625    }
 626   
 627    /**
 628    * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
 629    * throws declarations in lots of methods.
 630    * @param str String
 631    * @return utf8 bytes
 632    * @see String#getBytes()
 633    */
 634  1529 public static byte[] getBytes(String str)
 635    {
 636  1529 try
 637    {
 638  1529 return str.getBytes("UTF8");
 639    }
 640    catch (java.io.UnsupportedEncodingException e)
 641    {
 642  0 throw new Error("String to UTF-8 conversion failed: " + e.getMessage());
 643    }
 644    }
 645   
 646    /**
 647    * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
 648    * throws declarations in lots of methods.
 649    * @param bytes byte array
 650    * @param offset starting offset in byte array
 651    * @param length length in byte array starting from offset
 652    * @return same as <code>new String(bytes, offset, length, "UTF8")</code>
 653    */
 654  25625 public static String getString(byte[] bytes, int offset, int length)
 655    {
 656  25625 try
 657    {
 658  25625 return new String(bytes, offset, length, "UTF8");
 659    }
 660    catch (java.io.UnsupportedEncodingException e)
 661    {
 662  0 throw new Error("UTF-8 to string conversion failed: " + e.getMessage());
 663    }
 664    }
 665   
 666    /**
 667    * Return the last char in string. This is useful when trailing quotemark is missing on an attribute
 668    * @param str String
 669    * @return last char in String
 670    */
 671  26 public static int lastChar(String str)
 672    {
 673  26 if (str != null && str.length() > 0)
 674    {
 675  26 return str.charAt(str.length() - 1);
 676    }
 677   
 678  0 return 0;
 679    }
 680   
 681    /**
 682    * Determines if the specified character is whitespace.
 683    * @param c char
 684    * @return <code>true</code> if char is whitespace.
 685    */
 686  356367 public static boolean isWhite(char c)
 687    {
 688  356367 short m = map(c);
 689  356367 return TidyUtils.toBoolean(m & WHITE);
 690    }
 691   
 692    /**
 693    * Is the given char a digit?
 694    * @param c char
 695    * @return <code>true</code> if the given char is a digit
 696    */
 697  2862 public static boolean isDigit(char c)
 698    {
 699  2862 short m;
 700  2862 m = map(c);
 701  2862 return TidyUtils.toBoolean(m & DIGIT);
 702    }
 703   
 704    /**
 705    * Is the given char a letter?
 706    * @param c char
 707    * @return <code>true</code> if the given char is a letter
 708    */
 709  18713 public static boolean isLetter(char c)
 710    {
 711  18713 short m;
 712  18713 m = map(c);
 713  18713 return TidyUtils.toBoolean(m & LETTER);
 714    }
 715   
 716    /**
 717    * Is the given char valid in name? (letter, digit or "-", ".", ":", "_")
 718    * @param c char
 719    * @return <code>true</code> if char is a name char.
 720    */
 721  59443 public static boolean isNamechar(char c)
 722    {
 723  59443 short map = map(c);
 724   
 725  59443 return TidyUtils.toBoolean(map & NAMECHAR);
 726    }
 727   
 728    /**
 729    * Determines if the specified character is a lowercase character.
 730    * @param c char
 731    * @return <code>true</code> if char is lower case.
 732    */
 733  70 public static boolean isLower(char c)
 734    {
 735  70 short map = map(c);
 736   
 737  70 return TidyUtils.toBoolean(map & LOWERCASE);
 738    }
 739   
 740    /**
 741    * Determines if the specified character is a uppercase character.
 742    * @param c char
 743    * @return <code>true</code> if char is upper case.
 744    */
 745  83886 public static boolean isUpper(char c)
 746    {
 747  83886 short map = map(c);
 748   
 749  83886 return TidyUtils.toBoolean(map & UPPERCASE);
 750    }
 751   
 752    /**
 753    * Maps the given character to its lowercase equivalent.
 754    * @param c char
 755    * @return lowercase char.
 756    */
 757  7964 public static char toLower(char c)
 758    {
 759  7964 short m = map(c);
 760   
 761  7964 if (TidyUtils.toBoolean(m & UPPERCASE))
 762    {
 763  7964 c = (char) (c + 'a' - 'A');
 764    }
 765   
 766  7964 return c;
 767    }
 768   
 769    /**
 770    * Maps the given character to its uppercase equivalent.
 771    * @param c char
 772    * @return uppercase char.
 773    */
 774  70 public static char toUpper(char c)
 775    {
 776  70 short m = map(c);
 777   
 778  70 if (TidyUtils.toBoolean(m & LOWERCASE))
 779    {
 780  70 c = (char) (c + 'A' - 'a');
 781    }
 782   
 783  70 return c;
 784    }
 785   
 786    /**
 787    * Fold case of a char.
 788    * @param c char
 789    * @param tocaps convert to caps
 790    * @param xmlTags use xml tags? If true no change will be performed
 791    * @return folded char
 792    * @todo check the use of xmlTags parameter
 793    */
 794  27344 public static char foldCase(char c, boolean tocaps, boolean xmlTags)
 795    {
 796   
 797  27344 if (!xmlTags)
 798    {
 799   
 800  26864 if (tocaps)
 801    {
 802  70 if (isLower(c))
 803    {
 804  70 c = toUpper(c);
 805    }
 806    }
 807    else
 808    {
 809    // force to lower case
 810  26794 if (isUpper(c))
 811    {
 812  0 c = toLower(c);
 813    }
 814    }
 815    }
 816   
 817  27344 return c;
 818    }
 819   
 820    /**
 821    * Classify chars in String and put them in lexmap.
 822    * @param str String
 823    * @param code code associated to chars in the String
 824    */
 825  6 private static void mapStr(String str, short code)
 826    {
 827  6 int c;
 828  6 for (int i = 0; i < str.length(); i++)
 829    {
 830  71 c = str.charAt(i);
 831  71 lexmap[c] |= code;
 832    }
 833    }
 834   
 835    /**
 836    * Returns the constant which defines the classification of char in lexmap.
 837    * @param c char
 838    * @return char type
 839    */
 840  529375 private static short map(char c)
 841    {
 842  529375 return (c < 128 ? lexmap[c] : 0);
 843    }
 844   
 845    /**
 846    * Is the given character encoding supported?
 847    * @param name character encoding name
 848    * @return <code>true</code> if encoding is supported, false otherwhise.
 849    */
 850  37 public static boolean isCharEncodingSupported(String name)
 851    {
 852  37 name = EncodingNameMapper.toJava(name);
 853  37 if (name == null)
 854    {
 855  1 return false;
 856    }
 857   
 858  36 try
 859    {
 860  36 "".getBytes(name);
 861    }
 862    catch (java.io.UnsupportedEncodingException e)
 863    {
 864  0 return false;
 865    }
 866  36 return true;
 867    }
 868    }