Clover coverage report - Maven Clover report
Coverage timestamp: Tue Aug 1 2006 15:09:51 CEST
file stats: LOC: 356   Methods: 3
NCLOC: 240   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
EncodingNameMapper.java 60% 95% 66.7% 91.5%
coverage coverage
 1    /*
 2    * Java HTML Tidy - JTidy
 3    * HTML parser and pretty printer
 4    *
 5    * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 6    * Institute of Technology, Institut National de Recherche en
 7    * Informatique et en Automatique, Keio University). All Rights
 8    * Reserved.
 9    *
 10    * Contributing Author(s):
 11    *
 12    * Dave Raggett <dsr@w3.org>
 13    * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
 14    * Gary L Peskin <garyp@firstech.com> (Java development)
 15    * Sami Lempinen <sami@lempinen.net> (release management)
 16    * Fabrizio Giustina <fgiust at users.sourceforge.net>
 17    *
 18    * The contributing author(s) would like to thank all those who
 19    * helped with testing, bug fixes, and patience. This wouldn't
 20    * have been possible without all of you.
 21    *
 22    * COPYRIGHT NOTICE:
 23    *
 24    * This software and documentation is provided "as is," and
 25    * the copyright holders and contributing author(s) make no
 26    * representations or warranties, express or implied, including
 27    * but not limited to, warranties of merchantability or fitness
 28    * for any particular purpose or that the use of the software or
 29    * documentation will not infringe any third party patents,
 30    * copyrights, trademarks or other rights.
 31    *
 32    * The copyright holders and contributing author(s) will not be
 33    * liable for any direct, indirect, special or consequential damages
 34    * arising out of any use of the software or documentation, even if
 35    * advised of the possibility of such damage.
 36    *
 37    * Permission is hereby granted to use, copy, modify, and distribute
 38    * this source code, or portions hereof, documentation and executables,
 39    * for any purpose, without fee, subject to the following restrictions:
 40    *
 41    * 1. The origin of this source code must not be misrepresented.
 42    * 2. Altered versions must be plainly marked as such and must
 43    * not be misrepresented as being the original source.
 44    * 3. This Copyright notice may not be removed or altered from any
 45    * source or altered source distribution.
 46    *
 47    * The copyright holders and contributing author(s) specifically
 48    * permit, without fee, and encourage the use of this source code
 49    * as a component for supporting the Hypertext Markup Language in
 50    * commercial products. If you use this source code in a product,
 51    * acknowledgment is not required but would be appreciated.
 52    *
 53    */
 54    package org.w3c.tidy;
 55   
 56    import java.util.HashMap;
 57    import java.util.Map;
 58   
 59   
 60    /**
 61    * Maps between Java and IANA character encoding names. Also handles encoding alias used in tidy c.
 62    * @author Fabrizio Giustina
 63    * @version $Revision: 804 $ ($Author: fgiust $)
 64    * @see http://www.iana.org/assignments/character-sets
 65    */
 66    public abstract class EncodingNameMapper
 67    {
 68   
 69    /**
 70    * Map containing uppercase alias - {standard iana, standard java}.
 71    */
 72    private static Map encodingNameMap = new HashMap();
 73   
 74    static
 75    {
 76  1 encodingNameMap.put("ISO-8859-1", new String[]{"ISO-8859-1", "ISO8859_1"});
 77  1 encodingNameMap.put("ISO8859_1", new String[]{"ISO-8859-1", "ISO8859_1"});
 78  1 encodingNameMap.put("ISO-IR-100", new String[]{"ISO-8859-1", "ISO8859_1"});
 79  1 encodingNameMap.put("LATIN1", new String[]{"ISO-8859-1", "ISO8859_1"});
 80  1 encodingNameMap.put("CSISOLATIN1", new String[]{"ISO-8859-1", "ISO8859_1"});
 81  1 encodingNameMap.put("L1", new String[]{"ISO-8859-1", "ISO8859_1"});
 82  1 encodingNameMap.put("819", new String[]{"ISO-8859-1", "ISO8859_1"});
 83   
 84  1 encodingNameMap.put("US-ASCII", new String[]{"US-ASCII", "ASCII"});
 85  1 encodingNameMap.put("ASCII", new String[]{"US-ASCII", "ASCII"});
 86  1 encodingNameMap.put("ISO-IR-6", new String[]{"US-ASCII", "ASCII"});
 87  1 encodingNameMap.put("CSASCII", new String[]{"US-ASCII", "ASCII"});
 88  1 encodingNameMap.put("ISO646-US", new String[]{"US-ASCII", "ASCII"});
 89  1 encodingNameMap.put("US", new String[]{"US-ASCII", "ASCII"});
 90  1 encodingNameMap.put("367", new String[]{"US-ASCII", "ASCII"});
 91   
 92  1 encodingNameMap.put("UTF-8", new String[]{"UTF-8", "UTF8"});
 93  1 encodingNameMap.put("UTF8", new String[]{"UTF-8", "UTF8"});
 94  1 encodingNameMap.put("UTF-16", new String[]{"UTF-16", "Unicode"});
 95  1 encodingNameMap.put("UNICODE", new String[]{"UTF-16", "Unicode"});
 96  1 encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"});
 97  1 encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"}); // tidy
 98   
 99  1 encodingNameMap.put("UTF-16BE", new String[]{"UTF-16BE", "UnicodeBig"});
 100  1 encodingNameMap.put("UNICODEBIG", new String[]{"UTF-16BE", "UnicodeBig"});
 101  1 encodingNameMap.put("UTF16-BE", new String[]{"UTF-16BE", "UnicodeBig"});
 102  1 encodingNameMap.put("UTF-16LE", new String[]{"UTF-16LE", "UnicodeLittle"});
 103  1 encodingNameMap.put("UNICODELITTLE", new String[]{"UTF-16LE", "UnicodeLittle"});
 104  1 encodingNameMap.put("UTF16-LE", new String[]{"UTF-16LE", "UnicodeLittle"});
 105  1 encodingNameMap.put("UTF16BE", new String[]{"UTF-16BE", "UnicodeBig"}); // tidy
 106  1 encodingNameMap.put("UTF16LE", new String[]{"UTF-16LE", "UnicodeLittle"}); // tidy
 107   
 108  1 encodingNameMap.put("BIG5", new String[]{"BIG5", "Big5"});
 109  1 encodingNameMap.put("CSBIG5", new String[]{"BIG5", "Big5"});
 110   
 111  1 encodingNameMap.put("SJIS", new String[]{"SHIFT_JIS", "SJIS"});
 112  1 encodingNameMap.put("SHIFT_JIS", new String[]{"SHIFT_JIS", "SJIS"});
 113  1 encodingNameMap.put("CSSHIFTJIS", new String[]{"CSSHIFTJIS", "SJIS"});
 114  1 encodingNameMap.put("MS_KANJI", new String[]{"MS_KANJI", "SJIS"});
 115  1 encodingNameMap.put("SHIFTJIS", new String[]{"SHIFT_JIS", "SJIS"}); // tidy
 116   
 117  1 encodingNameMap.put("JIS", new String[]{"ISO-2022-JP", "JIS"});
 118  1 encodingNameMap.put("ISO-2022-JP", new String[]{"ISO-2022-JP", "JIS"});
 119  1 encodingNameMap.put("CSISO2022JP", new String[]{"CSISO2022JP", "JIS"});
 120  1 encodingNameMap.put("ISO2022", new String[]{"ISO-2022-JP", "JIS"}); // tidy
 121   
 122  1 encodingNameMap.put("ISO2022KR", new String[]{"ISO-2022-KR", "ISO2022KR"});
 123  1 encodingNameMap.put("ISO-2022-KR", new String[]{"ISO-2022-KR", "ISO2022KR"});
 124  1 encodingNameMap.put("CSISO2022KR", new String[]{"CSISO2022KR", "ISO2022KR"});
 125  1 encodingNameMap.put("ISO-2022-CN", new String[]{"ISO-2022-CN", "ISO2022CN"});
 126  1 encodingNameMap.put("ISO2022CN", new String[]{"ISO-2022-CN", "ISO2022CN"});
 127   
 128  1 encodingNameMap.put("MACROMAN", new String[]{"macintosh", "MacRoman"}); // tidy
 129  1 encodingNameMap.put("MACINTOSH", new String[]{"macintosh", "MacRoman"});
 130  1 encodingNameMap.put("MACINTOSH ROMAN", new String[]{"macintosh", "MacRoman"});
 131   
 132  1 encodingNameMap.put("37", new String[]{"IBM037", "CP037"});
 133  1 encodingNameMap.put("273", new String[]{"IBM273", "CP273"});
 134  1 encodingNameMap.put("277", new String[]{"IBM277", "CP277"});
 135  1 encodingNameMap.put("278", new String[]{"IBM278", "CP278"});
 136  1 encodingNameMap.put("280", new String[]{"IBM280", "CP280"});
 137  1 encodingNameMap.put("284", new String[]{"IBM284", "CP284"});
 138  1 encodingNameMap.put("285", new String[]{"IBM285", "CP285"});
 139  1 encodingNameMap.put("290", new String[]{"IBM290", "CP290"});
 140  1 encodingNameMap.put("297", new String[]{"IBM297", "CP297"});
 141  1 encodingNameMap.put("420", new String[]{"IBM420", "CP420"});
 142  1 encodingNameMap.put("424", new String[]{"IBM424", "CP424"});
 143  1 encodingNameMap.put("437", new String[]{"IBM437", "CP437"});
 144  1 encodingNameMap.put("500", new String[]{"IBM500", "CP500"});
 145  1 encodingNameMap.put("775", new String[]{"IBM775", "CP775"});
 146  1 encodingNameMap.put("850", new String[]{"IBM850", "CP850"});
 147  1 encodingNameMap.put("852", new String[]{"IBM852", "CP852"});
 148  1 encodingNameMap.put("CSPCP852", new String[]{"IBM852", "CP852"});
 149  1 encodingNameMap.put("855", new String[]{"IBM855", "CP855"});
 150  1 encodingNameMap.put("857", new String[]{"IBM857", "CP857"});
 151  1 encodingNameMap.put("858", new String[]{"IBM00858", "Cp858"});
 152  1 encodingNameMap.put("0858", new String[]{"IBM00858", "Cp858"});
 153  1 encodingNameMap.put("860", new String[]{"IBM860", "CP860"});
 154  1 encodingNameMap.put("861", new String[]{"IBM861", "CP861"});
 155  1 encodingNameMap.put("IS", new String[]{"IBM861", "CP861"});
 156  1 encodingNameMap.put("862", new String[]{"IBM862", "CP862"});
 157  1 encodingNameMap.put("863", new String[]{"IBM863", "CP863"});
 158  1 encodingNameMap.put("864", new String[]{"IBM864", "CP864"});
 159  1 encodingNameMap.put("865", new String[]{"IBM865", "CP865"});
 160  1 encodingNameMap.put("866", new String[]{"IBM866", "CP866"});
 161  1 encodingNameMap.put("868", new String[]{"IBM868", "CP868"});
 162  1 encodingNameMap.put("AR", new String[]{"IBM868", "CP868"});
 163  1 encodingNameMap.put("869", new String[]{"IBM869", "CP869"});
 164  1 encodingNameMap.put("GR", new String[]{"IBM869", "CP869"});
 165  1 encodingNameMap.put("870", new String[]{"IBM870", "CP870"});
 166  1 encodingNameMap.put("871", new String[]{"IBM871", "CP871"});
 167  1 encodingNameMap.put("EBCDIC-CP-IS", new String[]{"IBM871", "CP871"});
 168  1 encodingNameMap.put("918", new String[]{"CP918", "CP918"});
 169  1 encodingNameMap.put("924", new String[]{"IBM00924", "CP924"});
 170  1 encodingNameMap.put("0924", new String[]{"IBM00924", "CP924"});
 171  1 encodingNameMap.put("1026", new String[]{"IBM1026", "CP1026"});
 172  1 encodingNameMap.put("1047", new String[]{"IBM1047", "Cp1047"});
 173  1 encodingNameMap.put("1140", new String[]{"IBM01140", "Cp1140"});
 174  1 encodingNameMap.put("1141", new String[]{"IBM01141", "Cp1141"});
 175  1 encodingNameMap.put("1142", new String[]{"IBM01142", "Cp1142"});
 176  1 encodingNameMap.put("1143", new String[]{"IBM01143", "Cp1143"});
 177  1 encodingNameMap.put("1144", new String[]{"IBM01144", "Cp1144"});
 178  1 encodingNameMap.put("1145", new String[]{"IBM01145", "Cp1145"});
 179  1 encodingNameMap.put("1146", new String[]{"IBM01146", "Cp1146"});
 180  1 encodingNameMap.put("1147", new String[]{"IBM01147", "Cp1147"});
 181  1 encodingNameMap.put("1148", new String[]{"IBM01148", "Cp1148"});
 182  1 encodingNameMap.put("1149", new String[]{"IBM01149", "Cp1149"});
 183  1 encodingNameMap.put("1250", new String[]{"WINDOWS-1250", "Cp1250"});
 184  1 encodingNameMap.put("1251", new String[]{"WINDOWS-1251", "Cp1251"});
 185  1 encodingNameMap.put("1252", new String[]{"WINDOWS-1252", "Cp1252"});
 186  1 encodingNameMap.put("WIN1252", new String[]{"WINDOWS-1252", "Cp1252"}); // tidy
 187  1 encodingNameMap.put("1253", new String[]{"WINDOWS-1253", "Cp1253"});
 188  1 encodingNameMap.put("1254", new String[]{"WINDOWS-1254", "Cp1254"});
 189  1 encodingNameMap.put("1255", new String[]{"WINDOWS-1255", "Cp1255"});
 190  1 encodingNameMap.put("1256", new String[]{"WINDOWS-1256", "Cp1256"});
 191  1 encodingNameMap.put("1257", new String[]{"WINDOWS-1257", "Cp1257"});
 192  1 encodingNameMap.put("1258", new String[]{"WINDOWS-1258", "Cp1258"});
 193   
 194  1 encodingNameMap.put("EUC-JP", new String[]{"EUC-JP", "EUCJIS"});
 195  1 encodingNameMap.put("EUCJIS", new String[]{"EUC-JP", "EUCJIS"});
 196  1 encodingNameMap.put("EUC-KR", new String[]{"EUC-KR", "KSC5601"});
 197  1 encodingNameMap.put("KSC5601", new String[]{"EUC-KR", "KSC5601"});
 198  1 encodingNameMap.put("GB2312", new String[]{"GB2312", "GB2312"});
 199  1 encodingNameMap.put("CSGB2312", new String[]{"GB2312", "GB2312"});
 200  1 encodingNameMap.put("X0201", new String[]{"X0201", "JIS0201"});
 201  1 encodingNameMap.put("JIS0201", new String[]{"X0201", "JIS0201"});
 202  1 encodingNameMap.put("X0208", new String[]{"X0208", "JIS0208"});
 203  1 encodingNameMap.put("JIS0208", new String[]{"X0208", "JIS0208"});
 204  1 encodingNameMap.put("ISO-IR-87", new String[]{"ISO-IR-87", "JIS0208"});
 205  1 encodingNameMap.put("JIS0208", new String[]{"ISO-IR-87", "JIS0208"});
 206  1 encodingNameMap.put("X0212", new String[]{"X0212", "JIS0212"});
 207  1 encodingNameMap.put("JIS0212", new String[]{"X0212", "JIS0212"});
 208  1 encodingNameMap.put("ISO-IR-159", new String[]{"X0212", "JIS0212"});
 209  1 encodingNameMap.put("GB18030", new String[]{"GB18030", "GB18030"});
 210   
 211  1 encodingNameMap.put("936", new String[]{"GBK", "GBK"});
 212  1 encodingNameMap.put("MS936", new String[]{"GBK", "GBK"});
 213   
 214  1 encodingNameMap.put("MS932", new String[]{"WINDOWS-31J", "MS932"});
 215  1 encodingNameMap.put("31J", new String[]{"WINDOWS-31J", "MS932"});
 216  1 encodingNameMap.put("CSWINDOWS31J", new String[]{"WINDOWS-31J", "MS932"});
 217  1 encodingNameMap.put("TIS-620", new String[]{"TIS-620", "TIS620"});
 218  1 encodingNameMap.put("TIS620", new String[]{"TIS-620", "TIS620"});
 219   
 220  1 encodingNameMap.put("ISO-8859-2", new String[]{"ISO-8859-2", "ISO8859_2"});
 221  1 encodingNameMap.put("ISO8859_2", new String[]{"ISO-8859-2", "ISO8859_2"});
 222  1 encodingNameMap.put("ISO-IR-101", new String[]{"ISO-8859-2", "ISO8859_2"});
 223  1 encodingNameMap.put("LATIN2", new String[]{"ISO-8859-2", "ISO8859_2"});
 224  1 encodingNameMap.put("L2", new String[]{"ISO-8859-2", "ISO8859_2"});
 225   
 226  1 encodingNameMap.put("ISO-8859-3", new String[]{"ISO-8859-3", "ISO8859_3"});
 227  1 encodingNameMap.put("ISO8859_3", new String[]{"ISO-8859-3", "ISO8859_3"});
 228  1 encodingNameMap.put("ISO-IR-109", new String[]{"ISO-8859-3", "ISO8859_3"});
 229  1 encodingNameMap.put("LATIN3", new String[]{"ISO-8859-3", "ISO8859_3"});
 230  1 encodingNameMap.put("L3", new String[]{"ISO-8859-3", "ISO8859_3"});
 231   
 232  1 encodingNameMap.put("ISO-8859-4", new String[]{"ISO-8859-4", "ISO8859_4"});
 233  1 encodingNameMap.put("ISO8859_4", new String[]{"ISO-8859-4", "ISO8859_4"});
 234  1 encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"});
 235  1 encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"});
 236  1 encodingNameMap.put("L4", new String[]{"ISO-8859-4", "ISO8859_4"});
 237   
 238  1 encodingNameMap.put("ISO-8859-5", new String[]{"ISO-8859-5", "ISO8859_5"});
 239  1 encodingNameMap.put("ISO8859_5", new String[]{"ISO-8859-5", "ISO8859_5"});
 240  1 encodingNameMap.put("ISO-IR-144", new String[]{"ISO-8859-5", "ISO8859_5"});
 241  1 encodingNameMap.put("CYRILLIC", new String[]{"ISO-8859-5", "ISO8859_5"});
 242   
 243  1 encodingNameMap.put("ISO-8859-6", new String[]{"ISO-8859-6", "ISO8859_6"});
 244  1 encodingNameMap.put("ISO8859_6", new String[]{"ISO-8859-6", "ISO8859_6"});
 245  1 encodingNameMap.put("ISO-IR-127", new String[]{"ISO-8859-6", "ISO8859_6"});
 246  1 encodingNameMap.put("ARABIC", new String[]{"ISO-8859-6", "ISO8859_6"});
 247   
 248  1 encodingNameMap.put("ISO-8859-7", new String[]{"ISO-8859-7", "ISO8859_7"});
 249  1 encodingNameMap.put("ISO8859_7", new String[]{"ISO-8859-7", "ISO8859_7"});
 250  1 encodingNameMap.put("ISO-IR-126", new String[]{"ISO-8859-7", "ISO8859_7"});
 251  1 encodingNameMap.put("GREEK", new String[]{"ISO-8859-7", "ISO8859_7"});
 252   
 253  1 encodingNameMap.put("ISO-8859-8", new String[]{"ISO-8859-8", "ISO8859_8"});
 254  1 encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-8", "ISO8859_8"});
 255  1 encodingNameMap.put("ISO-8859-8-I", new String[]{"ISO-8859-8", "ISO8859_8"});
 256  1 encodingNameMap.put("ISO-IR-138", new String[]{"ISO-8859-8", "ISO8859_8"});
 257  1 encodingNameMap.put("HEBREW", new String[]{"ISO-8859-8", "ISO8859_8"});
 258   
 259  1 encodingNameMap.put("ISO-8859-9", new String[]{"ISO-8859-9", "ISO8859_8"});
 260  1 encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-9", "ISO8859_8"});
 261  1 encodingNameMap.put("CSISOLATINHEBREW", new String[]{"ISO-8859-9", "ISO8859_9"});
 262  1 encodingNameMap.put("ISO-IR-148", new String[]{"ISO-8859-9", "ISO8859_9"});
 263  1 encodingNameMap.put("LATIN5", new String[]{"ISO-8859-9", "ISO8859_9"});
 264  1 encodingNameMap.put("CSISOLATIN5", new String[]{"ISO-8859-9", "ISO8859_9"});
 265  1 encodingNameMap.put("L5", new String[]{"ISO-8859-9", "ISO8859_9"});
 266   
 267  1 encodingNameMap.put("ISO-8859-15", new String[]{"ISO-8859-15", "ISO8859_15"});
 268  1 encodingNameMap.put("ISO8859_15", new String[]{"ISO-8859-15", "ISO8859_15"});
 269   
 270  1 encodingNameMap.put("KOI8-R", new String[]{"KOI8-R", "KOI8_R"});
 271  1 encodingNameMap.put("KOI8_R", new String[]{"CSKOI8R", "KOI8_R"});
 272  1 encodingNameMap.put("CSKOI8R", new String[]{"CSKOI8R", "KOI8_R"});
 273    }
 274   
 275    /**
 276    * Convert a Java character encoding name to its IANA equivalent.
 277    * @param encoding java encoding name or alias
 278    * @return iana equivalent or null if no match is found.
 279    */
 280  0 public static String toIana(String encoding)
 281    {
 282  0 if (encoding == null)
 283    {
 284  0 return null;
 285    }
 286   
 287  0 String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
 288  0 if (values != null)
 289    {
 290  0 return values[0];
 291    }
 292   
 293  0 return null;
 294    }
 295   
 296    /**
 297    * "Fix" the name for common alias to reduce the number of entries needed in the hashmap. It actually removes CSIBM,
 298    * CCSID, IBM-, IBM0, CP-0, IBM, CP0, CP-, CP, WINDOWS- prefixes from given name.
 299    * @param encoding encoding name
 300    * @return "fixed" encoding.
 301    */
 302  91 private static String handlecommonAlias(String encoding)
 303    {
 304  91 String key = encoding.toUpperCase();
 305   
 306    // handle common alias
 307  91 if (key.startsWith("CSIBM") || key.startsWith("CCSID"))
 308    {
 309  0 key = key.substring(5);
 310    }
 311  91 else if (key.startsWith("IBM-") || key.startsWith("IBM0") || key.startsWith("CP-0"))
 312    {
 313  0 key = key.substring(4);
 314    }
 315  91 else if (key.startsWith("IBM") || key.startsWith("CP0") || key.startsWith("CP-"))
 316    {
 317  5 key = key.substring(3);
 318    }
 319  86 else if (key.startsWith("CP"))
 320    {
 321  2 key = key.substring(2);
 322    }
 323  84 else if (key.startsWith("WINDOWS-"))
 324    {
 325  3 key = key.substring(8);
 326    }
 327  81 else if (key.startsWith("ISO_"))
 328    {
 329  0 key = "ISO-" + key.substring(4);
 330    }
 331   
 332  91 return key;
 333    }
 334   
 335    /**
 336    * Converts an encoding name to the standard java name. Handles IANA names, legacy names used in tidy and different
 337    * java encoding alias. See http://www.iana.org/assignments/character-sets.
 338    * @param encoding IANA encoding name or alias
 339    * @return java equivalent or null if no match is found.
 340    */
 341  91 public static String toJava(String encoding)
 342    {
 343  91 if (encoding == null)
 344    {
 345  0 return null;
 346    }
 347   
 348  91 String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
 349  91 if (values != null)
 350    {
 351  90 return values[1];
 352    }
 353   
 354  1 return null;
 355    }
 356    }