View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.util.Hashtable;
57  import java.util.Iterator;
58  import java.util.Map;
59  
60  
61  /**
62   * Entity hash table.
63   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
64   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
65   * @author Fabrizio Giustina
66   * @version $Revision: 779 $ ($Author: fgiust $)
67   */
68  public final class EntityTable
69  {
70  
71      /**
72       * the default entity table.
73       */
74      private static EntityTable defaultEntityTable;
75  
76      /**
77       * Known entities.
78       */
79      private static Entity[] entities = {
80          new Entity("nbsp", 160),
81          new Entity("iexcl", 161),
82          new Entity("cent", 162),
83          new Entity("pound", 163),
84          new Entity("curren", 164),
85          new Entity("yen", 165),
86          new Entity("brvbar", 166),
87          new Entity("sect", 167),
88          new Entity("uml", 168),
89          new Entity("copy", 169),
90          new Entity("ordf", 170),
91          new Entity("laquo", 171),
92          new Entity("not", 172),
93          new Entity("shy", 173),
94          new Entity("reg", 174),
95          new Entity("macr", 175),
96          new Entity("deg", 176),
97          new Entity("plusmn", 177),
98          new Entity("sup2", 178),
99          new Entity("sup3", 179),
100         new Entity("acute", 180),
101         new Entity("micro", 181),
102         new Entity("para", 182),
103         new Entity("middot", 183),
104         new Entity("cedil", 184),
105         new Entity("sup1", 185),
106         new Entity("ordm", 186),
107         new Entity("raquo", 187),
108         new Entity("frac14", 188),
109         new Entity("frac12", 189),
110         new Entity("frac34", 190),
111         new Entity("iquest", 191),
112         new Entity("Agrave", 192),
113         new Entity("Aacute", 193),
114         new Entity("Acirc", 194),
115         new Entity("Atilde", 195),
116         new Entity("Auml", 196),
117         new Entity("Aring", 197),
118         new Entity("AElig", 198),
119         new Entity("Ccedil", 199),
120         new Entity("Egrave", 200),
121         new Entity("Eacute", 201),
122         new Entity("Ecirc", 202),
123         new Entity("Euml", 203),
124         new Entity("Igrave", 204),
125         new Entity("Iacute", 205),
126         new Entity("Icirc", 206),
127         new Entity("Iuml", 207),
128         new Entity("ETH", 208),
129         new Entity("Ntilde", 209),
130         new Entity("Ograve", 210),
131         new Entity("Oacute", 211),
132         new Entity("Ocirc", 212),
133         new Entity("Otilde", 213),
134         new Entity("Ouml", 214),
135         new Entity("times", 215),
136         new Entity("Oslash", 216),
137         new Entity("Ugrave", 217),
138         new Entity("Uacute", 218),
139         new Entity("Ucirc", 219),
140         new Entity("Uuml", 220),
141         new Entity("Yacute", 221),
142         new Entity("THORN", 222),
143         new Entity("szlig", 223),
144         new Entity("agrave", 224),
145         new Entity("aacute", 225),
146         new Entity("acirc", 226),
147         new Entity("atilde", 227),
148         new Entity("auml", 228),
149         new Entity("aring", 229),
150         new Entity("aelig", 230),
151         new Entity("ccedil", 231),
152         new Entity("egrave", 232),
153         new Entity("eacute", 233),
154         new Entity("ecirc", 234),
155         new Entity("euml", 235),
156         new Entity("igrave", 236),
157         new Entity("iacute", 237),
158         new Entity("icirc", 238),
159         new Entity("iuml", 239),
160         new Entity("eth", 240),
161         new Entity("ntilde", 241),
162         new Entity("ograve", 242),
163         new Entity("oacute", 243),
164         new Entity("ocirc", 244),
165         new Entity("otilde", 245),
166         new Entity("ouml", 246),
167         new Entity("divide", 247),
168         new Entity("oslash", 248),
169         new Entity("ugrave", 249),
170         new Entity("uacute", 250),
171         new Entity("ucirc", 251),
172         new Entity("uuml", 252),
173         new Entity("yacute", 253),
174         new Entity("thorn", 254),
175         new Entity("yuml", 255),
176         new Entity("fnof", 402),
177         new Entity("Alpha", 913),
178         new Entity("Beta", 914),
179         new Entity("Gamma", 915),
180         new Entity("Delta", 916),
181         new Entity("Epsilon", 917),
182         new Entity("Zeta", 918),
183         new Entity("Eta", 919),
184         new Entity("Theta", 920),
185         new Entity("Iota", 921),
186         new Entity("Kappa", 922),
187         new Entity("Lambda", 923),
188         new Entity("Mu", 924),
189         new Entity("Nu", 925),
190         new Entity("Xi", 926),
191         new Entity("Omicron", 927),
192         new Entity("Pi", 928),
193         new Entity("Rho", 929),
194         new Entity("Sigma", 931),
195         new Entity("Tau", 932),
196         new Entity("Upsilon", 933),
197         new Entity("Phi", 934),
198         new Entity("Chi", 935),
199         new Entity("Psi", 936),
200         new Entity("Omega", 937),
201         new Entity("alpha", 945),
202         new Entity("beta", 946),
203         new Entity("gamma", 947),
204         new Entity("delta", 948),
205         new Entity("epsilon", 949),
206         new Entity("zeta", 950),
207         new Entity("eta", 951),
208         new Entity("theta", 952),
209         new Entity("iota", 953),
210         new Entity("kappa", 954),
211         new Entity("lambda", 955),
212         new Entity("mu", 956),
213         new Entity("nu", 957),
214         new Entity("xi", 958),
215         new Entity("omicron", 959),
216         new Entity("pi", 960),
217         new Entity("rho", 961),
218         new Entity("sigmaf", 962),
219         new Entity("sigma", 963),
220         new Entity("tau", 964),
221         new Entity("upsilon", 965),
222         new Entity("phi", 966),
223         new Entity("chi", 967),
224         new Entity("psi", 968),
225         new Entity("omega", 969),
226         new Entity("thetasym", 977),
227         new Entity("upsih", 978),
228         new Entity("piv", 982),
229         new Entity("bull", 8226),
230         new Entity("hellip", 8230),
231         new Entity("prime", 8242),
232         new Entity("Prime", 8243),
233         new Entity("oline", 8254),
234         new Entity("frasl", 8260),
235         new Entity("weierp", 8472),
236         new Entity("image", 8465),
237         new Entity("real", 8476),
238         new Entity("trade", 8482),
239         new Entity("alefsym", 8501),
240         new Entity("larr", 8592),
241         new Entity("uarr", 8593),
242         new Entity("rarr", 8594),
243         new Entity("darr", 8595),
244         new Entity("harr", 8596),
245         new Entity("crarr", 8629),
246         new Entity("lArr", 8656),
247         new Entity("uArr", 8657),
248         new Entity("rArr", 8658),
249         new Entity("dArr", 8659),
250         new Entity("hArr", 8660),
251         new Entity("forall", 8704),
252         new Entity("part", 8706),
253         new Entity("exist", 8707),
254         new Entity("empty", 8709),
255         new Entity("nabla", 8711),
256         new Entity("isin", 8712),
257         new Entity("notin", 8713),
258         new Entity("ni", 8715),
259         new Entity("prod", 8719),
260         new Entity("sum", 8721),
261         new Entity("minus", 8722),
262         new Entity("lowast", 8727),
263         new Entity("radic", 8730),
264         new Entity("prop", 8733),
265         new Entity("infin", 8734),
266         new Entity("ang", 8736),
267         new Entity("and", 8743),
268         new Entity("or", 8744),
269         new Entity("cap", 8745),
270         new Entity("cup", 8746),
271         new Entity("int", 8747),
272         new Entity("there4", 8756),
273         new Entity("sim", 8764),
274         new Entity("cong", 8773),
275         new Entity("asymp", 8776),
276         new Entity("ne", 8800),
277         new Entity("equiv", 8801),
278         new Entity("le", 8804),
279         new Entity("ge", 8805),
280         new Entity("sub", 8834),
281         new Entity("sup", 8835),
282         new Entity("nsub", 8836),
283         new Entity("sube", 8838),
284         new Entity("supe", 8839),
285         new Entity("oplus", 8853),
286         new Entity("otimes", 8855),
287         new Entity("perp", 8869),
288         new Entity("sdot", 8901),
289         new Entity("lceil", 8968),
290         new Entity("rceil", 8969),
291         new Entity("lfloor", 8970),
292         new Entity("rfloor", 8971),
293         new Entity("lang", 9001),
294         new Entity("rang", 9002),
295         new Entity("loz", 9674),
296         new Entity("spades", 9824),
297         new Entity("clubs", 9827),
298         new Entity("hearts", 9829),
299         new Entity("diams", 9830),
300         new Entity("quot", 34),
301         new Entity("amp", 38),
302         new Entity("lt", 60),
303         new Entity("gt", 62),
304         new Entity("OElig", 338),
305         new Entity("oelig", 339),
306         new Entity("Scaron", 352),
307         new Entity("scaron", 353),
308         new Entity("Yuml", 376),
309         new Entity("circ", 710),
310         new Entity("tilde", 732),
311         new Entity("ensp", 8194),
312         new Entity("emsp", 8195),
313         new Entity("thinsp", 8201),
314         new Entity("zwnj", 8204),
315         new Entity("zwj", 8205),
316         new Entity("lrm", 8206),
317         new Entity("rlm", 8207),
318         new Entity("ndash", 8211),
319         new Entity("mdash", 8212),
320         new Entity("lsquo", 8216),
321         new Entity("rsquo", 8217),
322         new Entity("sbquo", 8218),
323         new Entity("ldquo", 8220),
324         new Entity("rdquo", 8221),
325         new Entity("bdquo", 8222),
326         new Entity("dagger", 8224),
327         new Entity("Dagger", 8225),
328         new Entity("permil", 8240),
329         new Entity("lsaquo", 8249),
330         new Entity("rsaquo", 8250),
331         new Entity("euro", 8364)};
332 
333     /**
334      * Entity map.
335      */
336     private Map entityHashtable = new Hashtable();
337 
338     /**
339      * use getDefaultEntityTable to get an entity table instance.
340      */
341     private EntityTable()
342     {
343         super();
344     }
345 
346     /**
347      * installs an entity.
348      * @param ent entity
349      * @return installed Entity
350      */
351     private Entity install(Entity ent)
352     {
353         return (Entity) this.entityHashtable.put(ent.getName(), ent);
354     }
355 
356     /**
357      * Lookup an entity by its name.
358      * @param name entity name
359      * @return entity
360      */
361     public Entity lookup(String name)
362     {
363         return (Entity) this.entityHashtable.get(name);
364     }
365 
366     /**
367      * Returns the entity code for the given entity name.
368      * @param name entity name
369      * @return entity code or 0 for unknown entity names
370      */
371     public int entityCode(String name)
372     {
373         // entity starting with "&" returns zero on error.
374         int c;
375 
376         if (name.length() <= 1)
377         {
378             return 0;
379         }
380 
381         // numeric entitity: name = "&#" followed by number
382         if (name.charAt(1) == '#')
383         {
384             c = 0; // zero on missing/bad number
385 
386             // 'x' prefix denotes hexadecimal number format
387             try
388             {
389                 if (name.length() >= 4 && name.charAt(2) == 'x')
390                 {
391                     c = Integer.parseInt(name.substring(3), 16);
392                 }
393                 else if (name.length() >= 3)
394                 {
395                     c = Integer.parseInt(name.substring(2));
396                 }
397             }
398             catch (NumberFormatException e)
399             {
400                 // ignore
401             }
402 
403             return c;
404         }
405 
406         // Named entity: name ="&" followed by a name
407         Entity ent = lookup(name.substring(1));
408         if (ent != null)
409         {
410             return ent.getCode();
411         }
412 
413         return 0; // zero signifies unknown entity name
414     }
415 
416     /**
417      * Returns the entity name for the given entity code.
418      * @param code entity code
419      * @return entity name or null for unknown entity codes
420      */
421     public String entityName(short code)
422     {
423         String name = null;
424         Entity ent;
425         Iterator en = this.entityHashtable.values().iterator();
426         while (en.hasNext())
427         {
428             ent = (Entity) en.next();
429             if (ent.getCode() == code)
430             {
431                 name = ent.getName();
432                 break;
433             }
434         }
435         return name;
436     }
437 
438     /**
439      * Returns the default entity table instance.
440      * @return entity table instance
441      */
442     public static EntityTable getDefaultEntityTable()
443     {
444         if (defaultEntityTable == null)
445         {
446             defaultEntityTable = new EntityTable();
447             for (int i = 0; i < entities.length; i++)
448             {
449                 defaultEntityTable.install(entities[i]);
450             }
451         }
452         return defaultEntityTable;
453     }
454 
455 }