1 |
| |
2 |
| |
3 |
| |
4 |
| |
5 |
| |
6 |
| |
7 |
| |
8 |
| |
9 |
| |
10 |
| |
11 |
| |
12 |
| |
13 |
| |
14 |
| |
15 |
| |
16 |
| |
17 |
| |
18 |
| |
19 |
| |
20 |
| |
21 |
| |
22 |
| |
23 |
| |
24 |
| |
25 |
| |
26 |
| |
27 |
| |
28 |
| |
29 |
| |
30 |
| |
31 |
| |
32 |
| |
33 |
| |
34 |
| |
35 |
| |
36 |
| |
37 |
| |
38 |
| |
39 |
| |
40 |
| |
41 |
| |
42 |
| |
43 |
| |
44 |
| |
45 |
| |
46 |
| |
47 |
| |
48 |
| |
49 |
| |
50 |
| |
51 |
| |
52 |
| |
53 |
| |
54 |
| package org.w3c.tidy; |
55 |
| |
56 |
| import java.util.HashMap; |
57 |
| import java.util.Map; |
58 |
| |
59 |
| |
60 |
| |
61 |
| |
62 |
| |
63 |
| |
64 |
| |
65 |
| |
66 |
| public abstract class EncodingNameMapper |
67 |
| { |
68 |
| |
69 |
| |
70 |
| |
71 |
| |
72 |
| private static Map encodingNameMap = new HashMap(); |
73 |
| |
74 |
| static |
75 |
| { |
76 |
1
| encodingNameMap.put("ISO-8859-1", new String[]{"ISO-8859-1", "ISO8859_1"});
|
77 |
1
| encodingNameMap.put("ISO8859_1", new String[]{"ISO-8859-1", "ISO8859_1"});
|
78 |
1
| encodingNameMap.put("ISO-IR-100", new String[]{"ISO-8859-1", "ISO8859_1"});
|
79 |
1
| encodingNameMap.put("LATIN1", new String[]{"ISO-8859-1", "ISO8859_1"});
|
80 |
1
| encodingNameMap.put("CSISOLATIN1", new String[]{"ISO-8859-1", "ISO8859_1"});
|
81 |
1
| encodingNameMap.put("L1", new String[]{"ISO-8859-1", "ISO8859_1"});
|
82 |
1
| encodingNameMap.put("819", new String[]{"ISO-8859-1", "ISO8859_1"});
|
83 |
| |
84 |
1
| encodingNameMap.put("US-ASCII", new String[]{"US-ASCII", "ASCII"});
|
85 |
1
| encodingNameMap.put("ASCII", new String[]{"US-ASCII", "ASCII"});
|
86 |
1
| encodingNameMap.put("ISO-IR-6", new String[]{"US-ASCII", "ASCII"});
|
87 |
1
| encodingNameMap.put("CSASCII", new String[]{"US-ASCII", "ASCII"});
|
88 |
1
| encodingNameMap.put("ISO646-US", new String[]{"US-ASCII", "ASCII"});
|
89 |
1
| encodingNameMap.put("US", new String[]{"US-ASCII", "ASCII"});
|
90 |
1
| encodingNameMap.put("367", new String[]{"US-ASCII", "ASCII"});
|
91 |
| |
92 |
1
| encodingNameMap.put("UTF-8", new String[]{"UTF-8", "UTF8"});
|
93 |
1
| encodingNameMap.put("UTF8", new String[]{"UTF-8", "UTF8"});
|
94 |
1
| encodingNameMap.put("UTF-16", new String[]{"UTF-16", "Unicode"});
|
95 |
1
| encodingNameMap.put("UNICODE", new String[]{"UTF-16", "Unicode"});
|
96 |
1
| encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"});
|
97 |
1
| encodingNameMap.put("UTF16", new String[]{"UTF-16", "Unicode"});
|
98 |
| |
99 |
1
| encodingNameMap.put("UTF-16BE", new String[]{"UTF-16BE", "UnicodeBig"});
|
100 |
1
| encodingNameMap.put("UNICODEBIG", new String[]{"UTF-16BE", "UnicodeBig"});
|
101 |
1
| encodingNameMap.put("UTF16-BE", new String[]{"UTF-16BE", "UnicodeBig"});
|
102 |
1
| encodingNameMap.put("UTF-16LE", new String[]{"UTF-16LE", "UnicodeLittle"});
|
103 |
1
| encodingNameMap.put("UNICODELITTLE", new String[]{"UTF-16LE", "UnicodeLittle"});
|
104 |
1
| encodingNameMap.put("UTF16-LE", new String[]{"UTF-16LE", "UnicodeLittle"});
|
105 |
1
| encodingNameMap.put("UTF16BE", new String[]{"UTF-16BE", "UnicodeBig"});
|
106 |
1
| encodingNameMap.put("UTF16LE", new String[]{"UTF-16LE", "UnicodeLittle"});
|
107 |
| |
108 |
1
| encodingNameMap.put("BIG5", new String[]{"BIG5", "Big5"});
|
109 |
1
| encodingNameMap.put("CSBIG5", new String[]{"BIG5", "Big5"});
|
110 |
| |
111 |
1
| encodingNameMap.put("SJIS", new String[]{"SHIFT_JIS", "SJIS"});
|
112 |
1
| encodingNameMap.put("SHIFT_JIS", new String[]{"SHIFT_JIS", "SJIS"});
|
113 |
1
| encodingNameMap.put("CSSHIFTJIS", new String[]{"CSSHIFTJIS", "SJIS"});
|
114 |
1
| encodingNameMap.put("MS_KANJI", new String[]{"MS_KANJI", "SJIS"});
|
115 |
1
| encodingNameMap.put("SHIFTJIS", new String[]{"SHIFT_JIS", "SJIS"});
|
116 |
| |
117 |
1
| encodingNameMap.put("JIS", new String[]{"ISO-2022-JP", "JIS"});
|
118 |
1
| encodingNameMap.put("ISO-2022-JP", new String[]{"ISO-2022-JP", "JIS"});
|
119 |
1
| encodingNameMap.put("CSISO2022JP", new String[]{"CSISO2022JP", "JIS"});
|
120 |
1
| encodingNameMap.put("ISO2022", new String[]{"ISO-2022-JP", "JIS"});
|
121 |
| |
122 |
1
| encodingNameMap.put("ISO2022KR", new String[]{"ISO-2022-KR", "ISO2022KR"});
|
123 |
1
| encodingNameMap.put("ISO-2022-KR", new String[]{"ISO-2022-KR", "ISO2022KR"});
|
124 |
1
| encodingNameMap.put("CSISO2022KR", new String[]{"CSISO2022KR", "ISO2022KR"});
|
125 |
1
| encodingNameMap.put("ISO-2022-CN", new String[]{"ISO-2022-CN", "ISO2022CN"});
|
126 |
1
| encodingNameMap.put("ISO2022CN", new String[]{"ISO-2022-CN", "ISO2022CN"});
|
127 |
| |
128 |
1
| encodingNameMap.put("MACROMAN", new String[]{"macintosh", "MacRoman"});
|
129 |
1
| encodingNameMap.put("MACINTOSH", new String[]{"macintosh", "MacRoman"});
|
130 |
1
| encodingNameMap.put("MACINTOSH ROMAN", new String[]{"macintosh", "MacRoman"});
|
131 |
| |
132 |
1
| encodingNameMap.put("37", new String[]{"IBM037", "CP037"});
|
133 |
1
| encodingNameMap.put("273", new String[]{"IBM273", "CP273"});
|
134 |
1
| encodingNameMap.put("277", new String[]{"IBM277", "CP277"});
|
135 |
1
| encodingNameMap.put("278", new String[]{"IBM278", "CP278"});
|
136 |
1
| encodingNameMap.put("280", new String[]{"IBM280", "CP280"});
|
137 |
1
| encodingNameMap.put("284", new String[]{"IBM284", "CP284"});
|
138 |
1
| encodingNameMap.put("285", new String[]{"IBM285", "CP285"});
|
139 |
1
| encodingNameMap.put("290", new String[]{"IBM290", "CP290"});
|
140 |
1
| encodingNameMap.put("297", new String[]{"IBM297", "CP297"});
|
141 |
1
| encodingNameMap.put("420", new String[]{"IBM420", "CP420"});
|
142 |
1
| encodingNameMap.put("424", new String[]{"IBM424", "CP424"});
|
143 |
1
| encodingNameMap.put("437", new String[]{"IBM437", "CP437"});
|
144 |
1
| encodingNameMap.put("500", new String[]{"IBM500", "CP500"});
|
145 |
1
| encodingNameMap.put("775", new String[]{"IBM775", "CP775"});
|
146 |
1
| encodingNameMap.put("850", new String[]{"IBM850", "CP850"});
|
147 |
1
| encodingNameMap.put("852", new String[]{"IBM852", "CP852"});
|
148 |
1
| encodingNameMap.put("CSPCP852", new String[]{"IBM852", "CP852"});
|
149 |
1
| encodingNameMap.put("855", new String[]{"IBM855", "CP855"});
|
150 |
1
| encodingNameMap.put("857", new String[]{"IBM857", "CP857"});
|
151 |
1
| encodingNameMap.put("858", new String[]{"IBM00858", "Cp858"});
|
152 |
1
| encodingNameMap.put("0858", new String[]{"IBM00858", "Cp858"});
|
153 |
1
| encodingNameMap.put("860", new String[]{"IBM860", "CP860"});
|
154 |
1
| encodingNameMap.put("861", new String[]{"IBM861", "CP861"});
|
155 |
1
| encodingNameMap.put("IS", new String[]{"IBM861", "CP861"});
|
156 |
1
| encodingNameMap.put("862", new String[]{"IBM862", "CP862"});
|
157 |
1
| encodingNameMap.put("863", new String[]{"IBM863", "CP863"});
|
158 |
1
| encodingNameMap.put("864", new String[]{"IBM864", "CP864"});
|
159 |
1
| encodingNameMap.put("865", new String[]{"IBM865", "CP865"});
|
160 |
1
| encodingNameMap.put("866", new String[]{"IBM866", "CP866"});
|
161 |
1
| encodingNameMap.put("868", new String[]{"IBM868", "CP868"});
|
162 |
1
| encodingNameMap.put("AR", new String[]{"IBM868", "CP868"});
|
163 |
1
| encodingNameMap.put("869", new String[]{"IBM869", "CP869"});
|
164 |
1
| encodingNameMap.put("GR", new String[]{"IBM869", "CP869"});
|
165 |
1
| encodingNameMap.put("870", new String[]{"IBM870", "CP870"});
|
166 |
1
| encodingNameMap.put("871", new String[]{"IBM871", "CP871"});
|
167 |
1
| encodingNameMap.put("EBCDIC-CP-IS", new String[]{"IBM871", "CP871"});
|
168 |
1
| encodingNameMap.put("918", new String[]{"CP918", "CP918"});
|
169 |
1
| encodingNameMap.put("924", new String[]{"IBM00924", "CP924"});
|
170 |
1
| encodingNameMap.put("0924", new String[]{"IBM00924", "CP924"});
|
171 |
1
| encodingNameMap.put("1026", new String[]{"IBM1026", "CP1026"});
|
172 |
1
| encodingNameMap.put("1047", new String[]{"IBM1047", "Cp1047"});
|
173 |
1
| encodingNameMap.put("1140", new String[]{"IBM01140", "Cp1140"});
|
174 |
1
| encodingNameMap.put("1141", new String[]{"IBM01141", "Cp1141"});
|
175 |
1
| encodingNameMap.put("1142", new String[]{"IBM01142", "Cp1142"});
|
176 |
1
| encodingNameMap.put("1143", new String[]{"IBM01143", "Cp1143"});
|
177 |
1
| encodingNameMap.put("1144", new String[]{"IBM01144", "Cp1144"});
|
178 |
1
| encodingNameMap.put("1145", new String[]{"IBM01145", "Cp1145"});
|
179 |
1
| encodingNameMap.put("1146", new String[]{"IBM01146", "Cp1146"});
|
180 |
1
| encodingNameMap.put("1147", new String[]{"IBM01147", "Cp1147"});
|
181 |
1
| encodingNameMap.put("1148", new String[]{"IBM01148", "Cp1148"});
|
182 |
1
| encodingNameMap.put("1149", new String[]{"IBM01149", "Cp1149"});
|
183 |
1
| encodingNameMap.put("1250", new String[]{"WINDOWS-1250", "Cp1250"});
|
184 |
1
| encodingNameMap.put("1251", new String[]{"WINDOWS-1251", "Cp1251"});
|
185 |
1
| encodingNameMap.put("1252", new String[]{"WINDOWS-1252", "Cp1252"});
|
186 |
1
| encodingNameMap.put("WIN1252", new String[]{"WINDOWS-1252", "Cp1252"});
|
187 |
1
| encodingNameMap.put("1253", new String[]{"WINDOWS-1253", "Cp1253"});
|
188 |
1
| encodingNameMap.put("1254", new String[]{"WINDOWS-1254", "Cp1254"});
|
189 |
1
| encodingNameMap.put("1255", new String[]{"WINDOWS-1255", "Cp1255"});
|
190 |
1
| encodingNameMap.put("1256", new String[]{"WINDOWS-1256", "Cp1256"});
|
191 |
1
| encodingNameMap.put("1257", new String[]{"WINDOWS-1257", "Cp1257"});
|
192 |
1
| encodingNameMap.put("1258", new String[]{"WINDOWS-1258", "Cp1258"});
|
193 |
| |
194 |
1
| encodingNameMap.put("EUC-JP", new String[]{"EUC-JP", "EUCJIS"});
|
195 |
1
| encodingNameMap.put("EUCJIS", new String[]{"EUC-JP", "EUCJIS"});
|
196 |
1
| encodingNameMap.put("EUC-KR", new String[]{"EUC-KR", "KSC5601"});
|
197 |
1
| encodingNameMap.put("KSC5601", new String[]{"EUC-KR", "KSC5601"});
|
198 |
1
| encodingNameMap.put("GB2312", new String[]{"GB2312", "GB2312"});
|
199 |
1
| encodingNameMap.put("CSGB2312", new String[]{"GB2312", "GB2312"});
|
200 |
1
| encodingNameMap.put("X0201", new String[]{"X0201", "JIS0201"});
|
201 |
1
| encodingNameMap.put("JIS0201", new String[]{"X0201", "JIS0201"});
|
202 |
1
| encodingNameMap.put("X0208", new String[]{"X0208", "JIS0208"});
|
203 |
1
| encodingNameMap.put("JIS0208", new String[]{"X0208", "JIS0208"});
|
204 |
1
| encodingNameMap.put("ISO-IR-87", new String[]{"ISO-IR-87", "JIS0208"});
|
205 |
1
| encodingNameMap.put("JIS0208", new String[]{"ISO-IR-87", "JIS0208"});
|
206 |
1
| encodingNameMap.put("X0212", new String[]{"X0212", "JIS0212"});
|
207 |
1
| encodingNameMap.put("JIS0212", new String[]{"X0212", "JIS0212"});
|
208 |
1
| encodingNameMap.put("ISO-IR-159", new String[]{"X0212", "JIS0212"});
|
209 |
1
| encodingNameMap.put("GB18030", new String[]{"GB18030", "GB18030"});
|
210 |
| |
211 |
1
| encodingNameMap.put("936", new String[]{"GBK", "GBK"});
|
212 |
1
| encodingNameMap.put("MS936", new String[]{"GBK", "GBK"});
|
213 |
| |
214 |
1
| encodingNameMap.put("MS932", new String[]{"WINDOWS-31J", "MS932"});
|
215 |
1
| encodingNameMap.put("31J", new String[]{"WINDOWS-31J", "MS932"});
|
216 |
1
| encodingNameMap.put("CSWINDOWS31J", new String[]{"WINDOWS-31J", "MS932"});
|
217 |
1
| encodingNameMap.put("TIS-620", new String[]{"TIS-620", "TIS620"});
|
218 |
1
| encodingNameMap.put("TIS620", new String[]{"TIS-620", "TIS620"});
|
219 |
| |
220 |
1
| encodingNameMap.put("ISO-8859-2", new String[]{"ISO-8859-2", "ISO8859_2"});
|
221 |
1
| encodingNameMap.put("ISO8859_2", new String[]{"ISO-8859-2", "ISO8859_2"});
|
222 |
1
| encodingNameMap.put("ISO-IR-101", new String[]{"ISO-8859-2", "ISO8859_2"});
|
223 |
1
| encodingNameMap.put("LATIN2", new String[]{"ISO-8859-2", "ISO8859_2"});
|
224 |
1
| encodingNameMap.put("L2", new String[]{"ISO-8859-2", "ISO8859_2"});
|
225 |
| |
226 |
1
| encodingNameMap.put("ISO-8859-3", new String[]{"ISO-8859-3", "ISO8859_3"});
|
227 |
1
| encodingNameMap.put("ISO8859_3", new String[]{"ISO-8859-3", "ISO8859_3"});
|
228 |
1
| encodingNameMap.put("ISO-IR-109", new String[]{"ISO-8859-3", "ISO8859_3"});
|
229 |
1
| encodingNameMap.put("LATIN3", new String[]{"ISO-8859-3", "ISO8859_3"});
|
230 |
1
| encodingNameMap.put("L3", new String[]{"ISO-8859-3", "ISO8859_3"});
|
231 |
| |
232 |
1
| encodingNameMap.put("ISO-8859-4", new String[]{"ISO-8859-4", "ISO8859_4"});
|
233 |
1
| encodingNameMap.put("ISO8859_4", new String[]{"ISO-8859-4", "ISO8859_4"});
|
234 |
1
| encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"});
|
235 |
1
| encodingNameMap.put("ISO-IR-110", new String[]{"ISO-8859-4", "ISO8859_4"});
|
236 |
1
| encodingNameMap.put("L4", new String[]{"ISO-8859-4", "ISO8859_4"});
|
237 |
| |
238 |
1
| encodingNameMap.put("ISO-8859-5", new String[]{"ISO-8859-5", "ISO8859_5"});
|
239 |
1
| encodingNameMap.put("ISO8859_5", new String[]{"ISO-8859-5", "ISO8859_5"});
|
240 |
1
| encodingNameMap.put("ISO-IR-144", new String[]{"ISO-8859-5", "ISO8859_5"});
|
241 |
1
| encodingNameMap.put("CYRILLIC", new String[]{"ISO-8859-5", "ISO8859_5"});
|
242 |
| |
243 |
1
| encodingNameMap.put("ISO-8859-6", new String[]{"ISO-8859-6", "ISO8859_6"});
|
244 |
1
| encodingNameMap.put("ISO8859_6", new String[]{"ISO-8859-6", "ISO8859_6"});
|
245 |
1
| encodingNameMap.put("ISO-IR-127", new String[]{"ISO-8859-6", "ISO8859_6"});
|
246 |
1
| encodingNameMap.put("ARABIC", new String[]{"ISO-8859-6", "ISO8859_6"});
|
247 |
| |
248 |
1
| encodingNameMap.put("ISO-8859-7", new String[]{"ISO-8859-7", "ISO8859_7"});
|
249 |
1
| encodingNameMap.put("ISO8859_7", new String[]{"ISO-8859-7", "ISO8859_7"});
|
250 |
1
| encodingNameMap.put("ISO-IR-126", new String[]{"ISO-8859-7", "ISO8859_7"});
|
251 |
1
| encodingNameMap.put("GREEK", new String[]{"ISO-8859-7", "ISO8859_7"});
|
252 |
| |
253 |
1
| encodingNameMap.put("ISO-8859-8", new String[]{"ISO-8859-8", "ISO8859_8"});
|
254 |
1
| encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-8", "ISO8859_8"});
|
255 |
1
| encodingNameMap.put("ISO-8859-8-I", new String[]{"ISO-8859-8", "ISO8859_8"});
|
256 |
1
| encodingNameMap.put("ISO-IR-138", new String[]{"ISO-8859-8", "ISO8859_8"});
|
257 |
1
| encodingNameMap.put("HEBREW", new String[]{"ISO-8859-8", "ISO8859_8"});
|
258 |
| |
259 |
1
| encodingNameMap.put("ISO-8859-9", new String[]{"ISO-8859-9", "ISO8859_8"});
|
260 |
1
| encodingNameMap.put("ISO8859_8", new String[]{"ISO-8859-9", "ISO8859_8"});
|
261 |
1
| encodingNameMap.put("CSISOLATINHEBREW", new String[]{"ISO-8859-9", "ISO8859_9"});
|
262 |
1
| encodingNameMap.put("ISO-IR-148", new String[]{"ISO-8859-9", "ISO8859_9"});
|
263 |
1
| encodingNameMap.put("LATIN5", new String[]{"ISO-8859-9", "ISO8859_9"});
|
264 |
1
| encodingNameMap.put("CSISOLATIN5", new String[]{"ISO-8859-9", "ISO8859_9"});
|
265 |
1
| encodingNameMap.put("L5", new String[]{"ISO-8859-9", "ISO8859_9"});
|
266 |
| |
267 |
1
| encodingNameMap.put("ISO-8859-15", new String[]{"ISO-8859-15", "ISO8859_15"});
|
268 |
1
| encodingNameMap.put("ISO8859_15", new String[]{"ISO-8859-15", "ISO8859_15"});
|
269 |
| |
270 |
1
| encodingNameMap.put("KOI8-R", new String[]{"KOI8-R", "KOI8_R"});
|
271 |
1
| encodingNameMap.put("KOI8_R", new String[]{"CSKOI8R", "KOI8_R"});
|
272 |
1
| encodingNameMap.put("CSKOI8R", new String[]{"CSKOI8R", "KOI8_R"});
|
273 |
| } |
274 |
| |
275 |
| |
276 |
| |
277 |
| |
278 |
| |
279 |
| |
280 |
0
| public static String toIana(String encoding)
|
281 |
| { |
282 |
0
| if (encoding == null)
|
283 |
| { |
284 |
0
| return null;
|
285 |
| } |
286 |
| |
287 |
0
| String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
|
288 |
0
| if (values != null)
|
289 |
| { |
290 |
0
| return values[0];
|
291 |
| } |
292 |
| |
293 |
0
| return null;
|
294 |
| } |
295 |
| |
296 |
| |
297 |
| |
298 |
| |
299 |
| |
300 |
| |
301 |
| |
302 |
91
| private static String handlecommonAlias(String encoding)
|
303 |
| { |
304 |
91
| String key = encoding.toUpperCase();
|
305 |
| |
306 |
| |
307 |
91
| if (key.startsWith("CSIBM") || key.startsWith("CCSID"))
|
308 |
| { |
309 |
0
| key = key.substring(5);
|
310 |
| } |
311 |
91
| else if (key.startsWith("IBM-") || key.startsWith("IBM0") || key.startsWith("CP-0"))
|
312 |
| { |
313 |
0
| key = key.substring(4);
|
314 |
| } |
315 |
91
| else if (key.startsWith("IBM") || key.startsWith("CP0") || key.startsWith("CP-"))
|
316 |
| { |
317 |
5
| key = key.substring(3);
|
318 |
| } |
319 |
86
| else if (key.startsWith("CP"))
|
320 |
| { |
321 |
2
| key = key.substring(2);
|
322 |
| } |
323 |
84
| else if (key.startsWith("WINDOWS-"))
|
324 |
| { |
325 |
3
| key = key.substring(8);
|
326 |
| } |
327 |
81
| else if (key.startsWith("ISO_"))
|
328 |
| { |
329 |
0
| key = "ISO-" + key.substring(4);
|
330 |
| } |
331 |
| |
332 |
91
| return key;
|
333 |
| } |
334 |
| |
335 |
| |
336 |
| |
337 |
| |
338 |
| |
339 |
| |
340 |
| |
341 |
91
| public static String toJava(String encoding)
|
342 |
| { |
343 |
91
| if (encoding == null)
|
344 |
| { |
345 |
0
| return null;
|
346 |
| } |
347 |
| |
348 |
91
| String[] values = (String[]) encodingNameMap.get(handlecommonAlias(encoding));
|
349 |
91
| if (values != null)
|
350 |
| { |
351 |
90
| return values[1];
|
352 |
| } |
353 |
| |
354 |
1
| return null;
|
355 |
| } |
356 |
| } |