1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 import java.util.Hashtable;
57 import java.util.Iterator;
58 import java.util.Map;
59
60
61 /**
62 * Entity hash table.
63 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
64 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
65 * @author Fabrizio Giustina
66 * @version $Revision: 779 $ ($Author: fgiust $)
67 */
68 public final class EntityTable
69 {
70
71 /**
72 * the default entity table.
73 */
74 private static EntityTable defaultEntityTable;
75
76 /**
77 * Known entities.
78 */
79 private static Entity[] entities = {
80 new Entity("nbsp", 160),
81 new Entity("iexcl", 161),
82 new Entity("cent", 162),
83 new Entity("pound", 163),
84 new Entity("curren", 164),
85 new Entity("yen", 165),
86 new Entity("brvbar", 166),
87 new Entity("sect", 167),
88 new Entity("uml", 168),
89 new Entity("copy", 169),
90 new Entity("ordf", 170),
91 new Entity("laquo", 171),
92 new Entity("not", 172),
93 new Entity("shy", 173),
94 new Entity("reg", 174),
95 new Entity("macr", 175),
96 new Entity("deg", 176),
97 new Entity("plusmn", 177),
98 new Entity("sup2", 178),
99 new Entity("sup3", 179),
100 new Entity("acute", 180),
101 new Entity("micro", 181),
102 new Entity("para", 182),
103 new Entity("middot", 183),
104 new Entity("cedil", 184),
105 new Entity("sup1", 185),
106 new Entity("ordm", 186),
107 new Entity("raquo", 187),
108 new Entity("frac14", 188),
109 new Entity("frac12", 189),
110 new Entity("frac34", 190),
111 new Entity("iquest", 191),
112 new Entity("Agrave", 192),
113 new Entity("Aacute", 193),
114 new Entity("Acirc", 194),
115 new Entity("Atilde", 195),
116 new Entity("Auml", 196),
117 new Entity("Aring", 197),
118 new Entity("AElig", 198),
119 new Entity("Ccedil", 199),
120 new Entity("Egrave", 200),
121 new Entity("Eacute", 201),
122 new Entity("Ecirc", 202),
123 new Entity("Euml", 203),
124 new Entity("Igrave", 204),
125 new Entity("Iacute", 205),
126 new Entity("Icirc", 206),
127 new Entity("Iuml", 207),
128 new Entity("ETH", 208),
129 new Entity("Ntilde", 209),
130 new Entity("Ograve", 210),
131 new Entity("Oacute", 211),
132 new Entity("Ocirc", 212),
133 new Entity("Otilde", 213),
134 new Entity("Ouml", 214),
135 new Entity("times", 215),
136 new Entity("Oslash", 216),
137 new Entity("Ugrave", 217),
138 new Entity("Uacute", 218),
139 new Entity("Ucirc", 219),
140 new Entity("Uuml", 220),
141 new Entity("Yacute", 221),
142 new Entity("THORN", 222),
143 new Entity("szlig", 223),
144 new Entity("agrave", 224),
145 new Entity("aacute", 225),
146 new Entity("acirc", 226),
147 new Entity("atilde", 227),
148 new Entity("auml", 228),
149 new Entity("aring", 229),
150 new Entity("aelig", 230),
151 new Entity("ccedil", 231),
152 new Entity("egrave", 232),
153 new Entity("eacute", 233),
154 new Entity("ecirc", 234),
155 new Entity("euml", 235),
156 new Entity("igrave", 236),
157 new Entity("iacute", 237),
158 new Entity("icirc", 238),
159 new Entity("iuml", 239),
160 new Entity("eth", 240),
161 new Entity("ntilde", 241),
162 new Entity("ograve", 242),
163 new Entity("oacute", 243),
164 new Entity("ocirc", 244),
165 new Entity("otilde", 245),
166 new Entity("ouml", 246),
167 new Entity("divide", 247),
168 new Entity("oslash", 248),
169 new Entity("ugrave", 249),
170 new Entity("uacute", 250),
171 new Entity("ucirc", 251),
172 new Entity("uuml", 252),
173 new Entity("yacute", 253),
174 new Entity("thorn", 254),
175 new Entity("yuml", 255),
176 new Entity("fnof", 402),
177 new Entity("Alpha", 913),
178 new Entity("Beta", 914),
179 new Entity("Gamma", 915),
180 new Entity("Delta", 916),
181 new Entity("Epsilon", 917),
182 new Entity("Zeta", 918),
183 new Entity("Eta", 919),
184 new Entity("Theta", 920),
185 new Entity("Iota", 921),
186 new Entity("Kappa", 922),
187 new Entity("Lambda", 923),
188 new Entity("Mu", 924),
189 new Entity("Nu", 925),
190 new Entity("Xi", 926),
191 new Entity("Omicron", 927),
192 new Entity("Pi", 928),
193 new Entity("Rho", 929),
194 new Entity("Sigma", 931),
195 new Entity("Tau", 932),
196 new Entity("Upsilon", 933),
197 new Entity("Phi", 934),
198 new Entity("Chi", 935),
199 new Entity("Psi", 936),
200 new Entity("Omega", 937),
201 new Entity("alpha", 945),
202 new Entity("beta", 946),
203 new Entity("gamma", 947),
204 new Entity("delta", 948),
205 new Entity("epsilon", 949),
206 new Entity("zeta", 950),
207 new Entity("eta", 951),
208 new Entity("theta", 952),
209 new Entity("iota", 953),
210 new Entity("kappa", 954),
211 new Entity("lambda", 955),
212 new Entity("mu", 956),
213 new Entity("nu", 957),
214 new Entity("xi", 958),
215 new Entity("omicron", 959),
216 new Entity("pi", 960),
217 new Entity("rho", 961),
218 new Entity("sigmaf", 962),
219 new Entity("sigma", 963),
220 new Entity("tau", 964),
221 new Entity("upsilon", 965),
222 new Entity("phi", 966),
223 new Entity("chi", 967),
224 new Entity("psi", 968),
225 new Entity("omega", 969),
226 new Entity("thetasym", 977),
227 new Entity("upsih", 978),
228 new Entity("piv", 982),
229 new Entity("bull", 8226),
230 new Entity("hellip", 8230),
231 new Entity("prime", 8242),
232 new Entity("Prime", 8243),
233 new Entity("oline", 8254),
234 new Entity("frasl", 8260),
235 new Entity("weierp", 8472),
236 new Entity("image", 8465),
237 new Entity("real", 8476),
238 new Entity("trade", 8482),
239 new Entity("alefsym", 8501),
240 new Entity("larr", 8592),
241 new Entity("uarr", 8593),
242 new Entity("rarr", 8594),
243 new Entity("darr", 8595),
244 new Entity("harr", 8596),
245 new Entity("crarr", 8629),
246 new Entity("lArr", 8656),
247 new Entity("uArr", 8657),
248 new Entity("rArr", 8658),
249 new Entity("dArr", 8659),
250 new Entity("hArr", 8660),
251 new Entity("forall", 8704),
252 new Entity("part", 8706),
253 new Entity("exist", 8707),
254 new Entity("empty", 8709),
255 new Entity("nabla", 8711),
256 new Entity("isin", 8712),
257 new Entity("notin", 8713),
258 new Entity("ni", 8715),
259 new Entity("prod", 8719),
260 new Entity("sum", 8721),
261 new Entity("minus", 8722),
262 new Entity("lowast", 8727),
263 new Entity("radic", 8730),
264 new Entity("prop", 8733),
265 new Entity("infin", 8734),
266 new Entity("ang", 8736),
267 new Entity("and", 8743),
268 new Entity("or", 8744),
269 new Entity("cap", 8745),
270 new Entity("cup", 8746),
271 new Entity("int", 8747),
272 new Entity("there4", 8756),
273 new Entity("sim", 8764),
274 new Entity("cong", 8773),
275 new Entity("asymp", 8776),
276 new Entity("ne", 8800),
277 new Entity("equiv", 8801),
278 new Entity("le", 8804),
279 new Entity("ge", 8805),
280 new Entity("sub", 8834),
281 new Entity("sup", 8835),
282 new Entity("nsub", 8836),
283 new Entity("sube", 8838),
284 new Entity("supe", 8839),
285 new Entity("oplus", 8853),
286 new Entity("otimes", 8855),
287 new Entity("perp", 8869),
288 new Entity("sdot", 8901),
289 new Entity("lceil", 8968),
290 new Entity("rceil", 8969),
291 new Entity("lfloor", 8970),
292 new Entity("rfloor", 8971),
293 new Entity("lang", 9001),
294 new Entity("rang", 9002),
295 new Entity("loz", 9674),
296 new Entity("spades", 9824),
297 new Entity("clubs", 9827),
298 new Entity("hearts", 9829),
299 new Entity("diams", 9830),
300 new Entity("quot", 34),
301 new Entity("amp", 38),
302 new Entity("lt", 60),
303 new Entity("gt", 62),
304 new Entity("OElig", 338),
305 new Entity("oelig", 339),
306 new Entity("Scaron", 352),
307 new Entity("scaron", 353),
308 new Entity("Yuml", 376),
309 new Entity("circ", 710),
310 new Entity("tilde", 732),
311 new Entity("ensp", 8194),
312 new Entity("emsp", 8195),
313 new Entity("thinsp", 8201),
314 new Entity("zwnj", 8204),
315 new Entity("zwj", 8205),
316 new Entity("lrm", 8206),
317 new Entity("rlm", 8207),
318 new Entity("ndash", 8211),
319 new Entity("mdash", 8212),
320 new Entity("lsquo", 8216),
321 new Entity("rsquo", 8217),
322 new Entity("sbquo", 8218),
323 new Entity("ldquo", 8220),
324 new Entity("rdquo", 8221),
325 new Entity("bdquo", 8222),
326 new Entity("dagger", 8224),
327 new Entity("Dagger", 8225),
328 new Entity("permil", 8240),
329 new Entity("lsaquo", 8249),
330 new Entity("rsaquo", 8250),
331 new Entity("euro", 8364)};
332
333 /**
334 * Entity map.
335 */
336 private Map entityHashtable = new Hashtable();
337
338 /**
339 * use getDefaultEntityTable to get an entity table instance.
340 */
341 private EntityTable()
342 {
343 super();
344 }
345
346 /**
347 * installs an entity.
348 * @param ent entity
349 * @return installed Entity
350 */
351 private Entity install(Entity ent)
352 {
353 return (Entity) this.entityHashtable.put(ent.getName(), ent);
354 }
355
356 /**
357 * Lookup an entity by its name.
358 * @param name entity name
359 * @return entity
360 */
361 public Entity lookup(String name)
362 {
363 return (Entity) this.entityHashtable.get(name);
364 }
365
366 /**
367 * Returns the entity code for the given entity name.
368 * @param name entity name
369 * @return entity code or 0 for unknown entity names
370 */
371 public int entityCode(String name)
372 {
373
374 int c;
375
376 if (name.length() <= 1)
377 {
378 return 0;
379 }
380
381
382 if (name.charAt(1) == '#')
383 {
384 c = 0;
385
386
387 try
388 {
389 if (name.length() >= 4 && name.charAt(2) == 'x')
390 {
391 c = Integer.parseInt(name.substring(3), 16);
392 }
393 else if (name.length() >= 3)
394 {
395 c = Integer.parseInt(name.substring(2));
396 }
397 }
398 catch (NumberFormatException e)
399 {
400
401 }
402
403 return c;
404 }
405
406
407 Entity ent = lookup(name.substring(1));
408 if (ent != null)
409 {
410 return ent.getCode();
411 }
412
413 return 0;
414 }
415
416 /**
417 * Returns the entity name for the given entity code.
418 * @param code entity code
419 * @return entity name or null for unknown entity codes
420 */
421 public String entityName(short code)
422 {
423 String name = null;
424 Entity ent;
425 Iterator en = this.entityHashtable.values().iterator();
426 while (en.hasNext())
427 {
428 ent = (Entity) en.next();
429 if (ent.getCode() == code)
430 {
431 name = ent.getName();
432 break;
433 }
434 }
435 return name;
436 }
437
438 /**
439 * Returns the default entity table instance.
440 * @return entity table instance
441 */
442 public static EntityTable getDefaultEntityTable()
443 {
444 if (defaultEntityTable == null)
445 {
446 defaultEntityTable = new EntityTable();
447 for (int i = 0; i < entities.length; i++)
448 {
449 defaultEntityTable.install(entities[i]);
450 }
451 }
452 return defaultEntityTable;
453 }
454
455 }