View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.util.ArrayList;
57  import java.util.Hashtable;
58  import java.util.Iterator;
59  import java.util.List;
60  import java.util.Map;
61  
62  
63  /**
64   * Tag dictionary node hash table.
65   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
66   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
67   * @author Fabrizio Giustina
68   * @version $Revision: 779 $ ($Author: fgiust $)
69   */
70  public final class TagTable
71  {
72  
73      /**
74       * dummy entry for all xml tags.
75       */
76      public static final Dict XML_TAGS = new Dict(null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null);
77  
78      /**
79       * all the known tags.
80       */
81      private static final Dict[] TAGS = {
82          new Dict(
83              "html",
84              Dict.VERS_ALL,
85              (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
86              ParserImpl.HTML,
87              TagCheckImpl.HTML),
88          new Dict("head", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.HEAD, null),
89          new Dict("title", Dict.VERS_ALL, Dict.CM_HEAD, ParserImpl.TITLE, null),
90          new Dict("base", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
91          new Dict("link", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.LINK),
92          new Dict("meta", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.META),
93          new Dict(
94              "style",
95              (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
96              Dict.CM_HEAD,
97              ParserImpl.SCRIPT,
98              TagCheckImpl.STYLE),
99          new Dict(
100             "script",
101             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
102             (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
103             ParserImpl.SCRIPT,
104             TagCheckImpl.SCRIPT),
105         new Dict(
106             "server",
107             Dict.VERS_NETSCAPE,
108             (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
109             ParserImpl.SCRIPT,
110             null),
111         new Dict("body", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.BODY, null),
112         new Dict("frameset", Dict.VERS_FRAMESET, (Dict.CM_HTML | Dict.CM_FRAMES), ParserImpl.FRAMESET, null),
113         new Dict("p", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OPT), ParserImpl.INLINE, null),
114         new Dict("h1", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
115         new Dict("h2", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
116         new Dict("h3", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
117         new Dict("h4", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
118         new Dict("h5", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
119         new Dict("h6", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
120         new Dict("ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
121         new Dict("ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
122         new Dict("dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.DEFLIST, null),
123         new Dict("dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null),
124         new Dict("menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null),
125         new Dict("pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.PRE, null),
126         new Dict("listing", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
127         new Dict("xmp", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
128         new Dict("plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
129         new Dict("address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
130         new Dict("blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
131         new Dict("form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, TagCheckImpl.FORM),
132         new Dict("isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
133         new Dict("fieldset", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_BLOCK, ParserImpl.BLOCK, null),
134         new Dict("table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.TABLETAG, TagCheckImpl.TABLE),
135         new Dict(
136             "hr",
137             (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
138             (Dict.CM_BLOCK | Dict.CM_EMPTY),
139             ParserImpl.EMPTY,
140             TagCheckImpl.HR),
141         new Dict("div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
142         new Dict("multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
143         new Dict("nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
144         new Dict("layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
145         new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
146         new Dict(
147             "nolayer",
148             Dict.VERS_NETSCAPE,
149             (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
150             ParserImpl.BLOCK,
151             null),
152         new Dict("align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
153         new Dict("center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
154         new Dict(
155             "ins",
156             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
157             (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
158             ParserImpl.INLINE,
159             null),
160         new Dict(
161             "del",
162             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
163             (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
164             ParserImpl.INLINE,
165             null),
166         new Dict("li", Dict.VERS_ALL, (Dict.CM_LIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
167         new Dict("dt", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.INLINE, null),
168         new Dict("dd", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
169         new Dict("caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.INLINE, TagCheckImpl.CAPTION),
170         new Dict("colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.COLGROUP, null),
171         new Dict("col", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
172         new Dict(
173             "thead",
174             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
175             (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
176             ParserImpl.ROWGROUP,
177             null),
178         new Dict(
179             "tfoot",
180             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
181             (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
182             ParserImpl.ROWGROUP,
183             null),
184         new Dict(
185             "tbody",
186             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
187             (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
188             ParserImpl.ROWGROUP,
189             null),
190         new Dict("tr", Dict.VERS_FROM32, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.ROW, null),
191         new Dict(
192             "td",
193             Dict.VERS_FROM32,
194             (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
195             ParserImpl.BLOCK,
196             TagCheckImpl.TABLECELL),
197         new Dict(
198             "th",
199             Dict.VERS_FROM32,
200             (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
201             ParserImpl.BLOCK,
202             TagCheckImpl.TABLECELL),
203         new Dict("q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
204         new Dict("a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, TagCheckImpl.ANCHOR),
205         new Dict("br", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
206         new Dict(
207             "img",
208             Dict.VERS_ALL,
209             (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY),
210             ParserImpl.EMPTY,
211             TagCheckImpl.IMG),
212         new Dict(
213             "object",
214             Dict.VERS_HTML40,
215             (Dict.CM_OBJECT | Dict.CM_HEAD | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
216             ParserImpl.BLOCK,
217             null),
218         new Dict(
219             "applet",
220             Dict.VERS_LOOSE,
221             (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
222             ParserImpl.BLOCK,
223             null),
224         new Dict(
225             "servlet",
226             Dict.VERS_SUN,
227             (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
228             ParserImpl.BLOCK,
229             null),
230         new Dict("param", Dict.VERS_FROM32, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
231         new Dict("embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
232         new Dict("noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
233         new Dict("iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.BLOCK, null),
234         new Dict("frame", Dict.VERS_FRAMESET, (Dict.CM_FRAMES | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
235         new Dict("noframes", Dict.VERS_IFRAME, (Dict.CM_BLOCK | Dict.CM_FRAMES), ParserImpl.NOFRAMES, null),
236         new Dict(
237             "noscript",
238             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
239             (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
240             ParserImpl.BLOCK,
241             null),
242         new Dict("b", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
243         new Dict("i", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
244         new Dict("u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
245         new Dict("tt", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
246         new Dict("s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
247         new Dict("strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
248         new Dict("big", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
249         new Dict("small", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
250         new Dict("sub", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
251         new Dict("sup", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
252         new Dict("em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
253         new Dict("strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
254         new Dict("dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
255         new Dict("code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
256         new Dict("samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
257         new Dict("kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
258         new Dict("var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
259         new Dict("cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
260         new Dict("abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
261         new Dict("acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
262         new Dict("span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.INLINE, null),
263         new Dict("blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null),
264         new Dict("nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null),
265         new Dict("wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
266         new Dict("marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE | Dict.CM_OPT), ParserImpl.INLINE, null),
267         new Dict("bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
268         new Dict("comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.INLINE, null),
269         new Dict("spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
270         new Dict("keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
271         new Dict(
272             "nolayer",
273             Dict.VERS_NETSCAPE,
274             (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
275             ParserImpl.BLOCK,
276             null),
277         new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
278         new Dict(
279             "map",
280             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
281             Dict.CM_INLINE,
282             ParserImpl.BLOCK,
283             TagCheckImpl.MAP),
284         new Dict(
285             "area",
286             (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
287             (Dict.CM_BLOCK | Dict.CM_EMPTY),
288             ParserImpl.EMPTY,
289             TagCheckImpl.AREA),
290         new Dict("input", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
291         new Dict("select", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.SELECT, null),
292         new Dict("option", Dict.VERS_ALL, (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.TEXT, null),
293         new Dict(
294             "optgroup",
295             (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
296             (Dict.CM_FIELD | Dict.CM_OPT),
297             ParserImpl.OPTGROUP,
298             null),
299         new Dict("textarea", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.TEXT, null),
300         new Dict("label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
301         new Dict("legend", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
302         new Dict("button", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
303         new Dict("basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
304         new Dict("font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
305         new Dict("bdo", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
306         // elements for XHTML 1.1
307         new Dict("ruby", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
308         new Dict("rbc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
309         new Dict("rtc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
310         new Dict("rb", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
311         new Dict("rt", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
312         new Dict("", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
313         new Dict("rp", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
314     //
315     };
316 
317     /**
318      * html tag.
319      */
320     protected Dict tagHtml;
321 
322     /**
323      * head tag.
324      */
325     protected Dict tagHead;
326 
327     /**
328      * body tag.
329      */
330     protected Dict tagBody;
331 
332     /**
333      * frameset tag.
334      */
335     protected Dict tagFrameset;
336 
337     /**
338      * frame tag.
339      */
340     protected Dict tagFrame;
341 
342     /**
343      * iframe tag.
344      */
345     protected Dict tagIframe;
346 
347     /**
348      * noframes tag.
349      */
350     protected Dict tagNoframes;
351 
352     /**
353      * meta tag.
354      */
355     protected Dict tagMeta;
356 
357     /**
358      * title tag.
359      */
360     protected Dict tagTitle;
361 
362     /**
363      * base tag.
364      */
365     protected Dict tagBase;
366 
367     /**
368      * hr tag.
369      */
370     protected Dict tagHr;
371 
372     /**
373      * pre tag.
374      */
375     protected Dict tagPre;
376 
377     /**
378      * listing tag.
379      */
380     protected Dict tagListing;
381 
382     /**
383      * h1 tag.
384      */
385     protected Dict tagH1;
386 
387     /**
388      * h2 tag.
389      */
390     protected Dict tagH2;
391 
392     /**
393      * p tag.
394      */
395     protected Dict tagP;
396 
397     /**
398      * ul tag.
399      */
400     protected Dict tagUl;
401 
402     /**
403      * ol tag.
404      */
405     protected Dict tagOl;
406 
407     /**
408      * dir tag.
409      */
410     protected Dict tagDir;
411 
412     /**
413      * li tag.
414      */
415     protected Dict tagLi;
416 
417     /**
418      * dt tag.
419      */
420     protected Dict tagDt;
421 
422     /**
423      * dd tag.
424      */
425     protected Dict tagDd;
426 
427     /**
428      * dl tag.
429      */
430     protected Dict tagDl;
431 
432     /**
433      * td tag.
434      */
435     protected Dict tagTd;
436 
437     /**
438      * th tag.
439      */
440     protected Dict tagTh;
441 
442     /**
443      * tr tag.
444      */
445     protected Dict tagTr;
446 
447     /**
448      * col tag.
449      */
450     protected Dict tagCol;
451 
452     /**
453      * colgroup tag.
454      */
455     protected Dict tagColgroup;
456 
457     /**
458      * br tag.
459      */
460     protected Dict tagBr;
461 
462     /**
463      * a tag.
464      */
465     protected Dict tagA;
466 
467     /**
468      * link tag.
469      */
470     protected Dict tagLink;
471 
472     /**
473      * b tag.
474      */
475     protected Dict tagB;
476 
477     /**
478      * i tag.
479      */
480     protected Dict tagI;
481 
482     /**
483      * strong tag.
484      */
485     protected Dict tagStrong;
486 
487     /**
488      * em tag.
489      */
490     protected Dict tagEm;
491 
492     /**
493      * big tag.
494      */
495     protected Dict tagBig;
496 
497     /**
498      * small tag.
499      */
500     protected Dict tagSmall;
501 
502     /**
503      * param tag.
504      */
505     protected Dict tagParam;
506 
507     /**
508      * option tag.
509      */
510     protected Dict tagOption;
511 
512     /**
513      * optgroup tag.
514      */
515     protected Dict tagOptgroup;
516 
517     /**
518      * img tag.
519      */
520     protected Dict tagImg;
521 
522     /**
523      * map tag.
524      */
525     protected Dict tagMap;
526 
527     /**
528      * area tag.
529      */
530     protected Dict tagArea;
531 
532     /**
533      * nobr tag.
534      */
535     protected Dict tagNobr;
536 
537     /**
538      * wbr tag.
539      */
540     protected Dict tagWbr;
541 
542     /**
543      * font tag.
544      */
545     protected Dict tagFont;
546 
547     /**
548      * spacer tag.
549      */
550     protected Dict tagSpacer;
551 
552     /**
553      * layer tag.
554      */
555     protected Dict tagLayer;
556 
557     /**
558      * center tag.
559      */
560     protected Dict tagCenter;
561 
562     /**
563      * style tag.
564      */
565     protected Dict tagStyle;
566 
567     /**
568      * script tag.
569      */
570     protected Dict tagScript;
571 
572     /**
573      * noscript tag.
574      */
575     protected Dict tagNoscript;
576 
577     /**
578      * table tag.
579      */
580     protected Dict tagTable;
581 
582     /**
583      * caption tag.
584      */
585     protected Dict tagCaption;
586 
587     /**
588      * form tag.
589      */
590     protected Dict tagForm;
591 
592     /**
593      * textarea tag.
594      */
595     protected Dict tagTextarea;
596 
597     /**
598      * blockquote tag.
599      */
600     protected Dict tagBlockquote;
601 
602     /**
603      * applet tag.
604      */
605     protected Dict tagApplet;
606 
607     /**
608      * object tag.
609      */
610     protected Dict tagObject;
611 
612     /**
613      * div tag.
614      */
615     protected Dict tagDiv;
616 
617     /**
618      * span tag.
619      */
620     protected Dict tagSpan;
621 
622     /**
623      * input tag.
624      */
625     protected Dict tagInput;
626 
627     /**
628      * tag.
629      */
630     protected Dict tagQ;
631 
632     /**
633      * a proprietary tag added by Tidy, along with tag_nobr, tag_wbr.
634      */
635     protected Dict tagBlink;
636 
637     /**
638      * anchor/node hash.
639      */
640     protected Anchor anchorList;
641 
642     /**
643      * configuration.
644      */
645     private Configuration configuration;
646 
647     /**
648      * hashTable containing tags.
649      */
650     private Map tagHashtable = new Hashtable();
651 
652     /**
653      * Instantiates a new tag table with known tags.
654      */
655     protected TagTable()
656     {
657         for (int i = 0; i < TAGS.length; i++)
658         {
659             install(TAGS[i]);
660         }
661         tagHtml = lookup("html");
662         tagHead = lookup("head");
663         tagBody = lookup("body");
664         tagFrameset = lookup("frameset");
665         tagFrame = lookup("frame");
666         tagIframe = lookup("iframe");
667         tagNoframes = lookup("noframes");
668         tagMeta = lookup("meta");
669         tagTitle = lookup("title");
670         tagBase = lookup("base");
671         tagHr = lookup("hr");
672         tagPre = lookup("pre");
673         tagListing = lookup("listing");
674         tagH1 = lookup("h1");
675         tagH2 = lookup("h2");
676         tagP = lookup("p");
677         tagUl = lookup("ul");
678         tagOl = lookup("ol");
679         tagDir = lookup("dir");
680         tagLi = lookup("li");
681         tagDt = lookup("dt");
682         tagDd = lookup("dd");
683         tagDl = lookup("dl");
684         tagTd = lookup("td");
685         tagTh = lookup("th");
686         tagTr = lookup("tr");
687         tagCol = lookup("col");
688         tagColgroup = lookup("colgroup");
689         tagBr = lookup("br");
690         tagA = lookup("a");
691         tagLink = lookup("link");
692         tagB = lookup("b");
693         tagI = lookup("i");
694         tagStrong = lookup("strong");
695         tagEm = lookup("em");
696         tagBig = lookup("big");
697         tagSmall = lookup("small");
698         tagParam = lookup("param");
699         tagOption = lookup("option");
700         tagOptgroup = lookup("optgroup");
701         tagImg = lookup("img");
702         tagMap = lookup("map");
703         tagArea = lookup("area");
704         tagNobr = lookup("nobr");
705         tagWbr = lookup("wbr");
706         tagFont = lookup("font");
707         tagSpacer = lookup("spacer");
708         tagLayer = lookup("layer");
709         tagCenter = lookup("center");
710         tagStyle = lookup("style");
711         tagScript = lookup("script");
712         tagNoscript = lookup("noscript");
713         tagTable = lookup("table");
714         tagCaption = lookup("caption");
715         tagForm = lookup("form");
716         tagTextarea = lookup("textarea");
717         tagBlockquote = lookup("blockquote");
718         tagApplet = lookup("applet");
719         tagObject = lookup("object");
720         tagDiv = lookup("div");
721         tagSpan = lookup("span");
722         tagInput = lookup("input");
723         tagQ = lookup("q");
724         tagBlink = lookup("blink");
725     }
726 
727     /**
728      * Setter for the current configuration instance.
729      * @param configuration configuration instance
730      */
731     public void setConfiguration(Configuration configuration)
732     {
733         this.configuration = configuration;
734     }
735 
736     /**
737      * Lookup a tag definition by its name.
738      * @param name tag name
739      * @return tag definition (Dict)
740      */
741     public Dict lookup(String name)
742     {
743         return (Dict) tagHashtable.get(name);
744     }
745 
746     /**
747      * Installs a new tag in the tag table, or modify an existing one.
748      * @param dict tag definition
749      * @return installed Dict instance
750      */
751     public Dict install(Dict dict)
752     {
753         Dict d = (Dict) tagHashtable.get(dict.name);
754         if (d != null)
755         {
756             d.versions = dict.versions;
757             d.model |= dict.model;
758             d.setParser(dict.getParser());
759             d.setChkattrs(dict.getChkattrs());
760             return d;
761         }
762 
763         tagHashtable.put(dict.name, dict);
764         return dict;
765 
766     }
767 
768     /**
769      * Finds a tag by name.
770      * @param node Node to find. If the element is found the tag property of node will be set.
771      * @return true if the tag is found, false otherwise
772      */
773     public boolean findTag(Node node)
774     {
775         Dict np;
776 
777         if (configuration != null && configuration.xmlTags)
778         {
779             node.tag = XML_TAGS;
780             return true;
781         }
782 
783         if (node.element != null)
784         {
785             np = lookup(node.element);
786             if (np != null)
787             {
788                 node.tag = np;
789                 return true;
790             }
791         }
792 
793         return false;
794     }
795 
796     /**
797      * Finds a parser fo the given node.
798      * @param node Node
799      * @return parser for the node
800      */
801     public Parser findParser(Node node)
802     {
803         Dict np;
804 
805         if (node.element != null)
806         {
807             np = lookup(node.element);
808             if (np != null)
809             {
810                 return np.getParser();
811             }
812         }
813 
814         return null;
815     }
816 
817     /**
818      * May id or name serve as anchor?
819      * @param node Node
820      * @return <code>true</code> if tag can serve as an anchor
821      */
822     boolean isAnchorElement(Node node)
823     {
824         return node.tag == this.tagA
825             || node.tag == this.tagApplet
826             || node.tag == this.tagForm
827             || node.tag == this.tagFrame
828             || node.tag == this.tagIframe
829             || node.tag == this.tagImg
830             || node.tag == this.tagMap;
831     }
832 
833     /**
834      * Defines a new tag.
835      * @param tagType tag type. Can be TAGTYPE_BLOCK | TAGTYPE_EMPTY | TAGTYPE_PRE | TAGTYPE_INLINE
836      * @param name tag name
837      */
838     public void defineTag(short tagType, String name)
839     {
840         Parser tagParser;
841         short model;
842 
843         switch (tagType)
844         {
845             case Dict.TAGTYPE_BLOCK :
846                 model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
847                 tagParser = ParserImpl.BLOCK;
848                 break;
849 
850             case Dict.TAGTYPE_EMPTY :
851                 model = (short) (Dict.CM_EMPTY | Dict.CM_NO_INDENT | Dict.CM_NEW);
852                 tagParser = ParserImpl.BLOCK;
853                 break;
854 
855             case Dict.TAGTYPE_PRE :
856                 model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
857                 tagParser = ParserImpl.PRE;
858                 break;
859 
860             case Dict.TAGTYPE_INLINE :
861             default :
862                 // default to inline tag
863                 model = (short) (Dict.CM_INLINE | Dict.CM_NO_INDENT | Dict.CM_NEW);
864                 tagParser = ParserImpl.INLINE;
865                 break;
866         }
867 
868         install(new Dict(name, Dict.VERS_PROPRIETARY, model, tagParser, null));
869     }
870 
871     /**
872      * return a List containing all the user-defined tag names.
873      * @param tagType one of Dict.TAGTYPE_EMPTY | Dict.TAGTYPE_INLINE | Dict.TAGTYPE_BLOCK | Dict.TAGTYPE_PRE
874      * @return List containing all the user-defined tag names
875      */
876     List findAllDefinedTag(short tagType)
877     {
878         List tagNames = new ArrayList();
879 
880         Iterator iterator = tagHashtable.values().iterator();
881         while (iterator.hasNext())
882         {
883             Dict curDictEntry = (Dict) iterator.next();
884 
885             if (curDictEntry != null)
886             {
887                 switch (tagType)
888                 {
889                     // defined tags can be empty + inline
890                     case Dict.TAGTYPE_EMPTY :
891                         if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
892                             && ((curDictEntry.model & Dict.CM_EMPTY) == Dict.CM_EMPTY)
893                             && // (curDictEntry.parser == ParseBlock) &&
894                             (curDictEntry != tagWbr))
895                         {
896                             tagNames.add(curDictEntry.name);
897                         }
898                         break;
899 
900                     // defined tags can be empty + inline
901                     case Dict.TAGTYPE_INLINE :
902                         if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
903                             && ((curDictEntry.model & Dict.CM_INLINE) == Dict.CM_INLINE)
904                             && // (curDictEntry.parser == ParseInline) &&
905                             (curDictEntry != tagBlink)
906                             && (curDictEntry != tagNobr)
907                             && (curDictEntry != tagWbr))
908                         {
909                             tagNames.add(curDictEntry.name);
910                         }
911                         break;
912 
913                     // defined tags can be empty + block
914                     case Dict.TAGTYPE_BLOCK :
915                         if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
916                             && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
917                             && (curDictEntry.getParser() == ParserImpl.BLOCK))
918                         {
919                             tagNames.add(curDictEntry.name);
920                         }
921                         break;
922 
923                     case Dict.TAGTYPE_PRE :
924                         if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
925                             && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
926                             && (curDictEntry.getParser() == ParserImpl.PRE))
927                         {
928                             tagNames.add(curDictEntry.name);
929                         }
930                         break;
931                 }
932             }
933         }
934 
935         return tagNames;
936     }
937 
938     /**
939      * Free node's attributes.
940      * @param node Node
941      */
942     public void freeAttrs(Node node)
943     {
944         while (node.attributes != null)
945         {
946             AttVal av = node.attributes;
947             if ("id".equalsIgnoreCase(av.attribute) || "name".equalsIgnoreCase(av.attribute) && isAnchorElement(node))
948             {
949                 removeAnchorByNode(node);
950             }
951 
952             node.attributes = av.next;
953         }
954     }
955 
956     /**
957      * Removes anchor for specific node.
958      * @param node Node
959      */
960     void removeAnchorByNode(Node node)
961     {
962         Anchor delme = null;
963         Anchor found = null;
964         Anchor prev = null;
965         Anchor next = null;
966 
967         for (found = anchorList; found != null; found = found.next)
968         {
969             next = found.next;
970 
971             if (found.node == node)
972             {
973                 if (prev != null)
974                 {
975                     prev.next = next;
976                 }
977                 else
978                 {
979                     anchorList = next;
980                 }
981 
982                 delme = found;
983             }
984             else
985             {
986                 prev = found;
987             }
988         }
989         if (delme != null)
990         {
991             delme = null; // freeAnchor
992         }
993     }
994 
995     /**
996      * Initialize a new anchor.
997      * @return a new anchor element
998      */
999     Anchor newAnchor()
1000     {
1001         Anchor a = new Anchor();
1002         return a;
1003     }
1004 
1005     /**
1006      * Adds a new anchor to namespace.
1007      * @param name anchor name
1008      * @param node destination for this anchor
1009      * @return Anchor
1010      */
1011     Anchor addAnchor(String name, Node node)
1012     {
1013         Anchor a = newAnchor();
1014 
1015         a.name = name;
1016         a.node = node;
1017 
1018         if (anchorList == null)
1019         {
1020             anchorList = a;
1021         }
1022         else
1023         {
1024             Anchor here = anchorList;
1025 
1026             while (here.next != null)
1027             {
1028                 here = here.next;
1029             }
1030             here.next = a;
1031         }
1032 
1033         return anchorList;
1034     }
1035 
1036     /**
1037      * Return node associated with anchor.
1038      * @param name anchor name
1039      * @return node associated with anchor
1040      */
1041     Node getNodeByAnchor(String name)
1042     {
1043         Anchor found;
1044 
1045         for (found = anchorList; found != null; found = found.next)
1046         {
1047             if (name.equalsIgnoreCase(found.name))
1048             {
1049                 break;
1050             }
1051         }
1052 
1053         if (found != null)
1054         {
1055             return found.node;
1056         }
1057 
1058         return null;
1059     }
1060 
1061     /**
1062      * free all anchors.
1063      */
1064     void freeAnchors()
1065     {
1066         anchorList = null;
1067     }
1068 
1069 }