1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 import java.util.ArrayList;
57 import java.util.Hashtable;
58 import java.util.Iterator;
59 import java.util.List;
60 import java.util.Map;
61
62
63 /**
64 * Tag dictionary node hash table.
65 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
66 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
67 * @author Fabrizio Giustina
68 * @version $Revision: 779 $ ($Author: fgiust $)
69 */
70 public final class TagTable
71 {
72
73 /**
74 * dummy entry for all xml tags.
75 */
76 public static final Dict XML_TAGS = new Dict(null, Dict.VERS_ALL, Dict.CM_BLOCK, null, null);
77
78 /**
79 * all the known tags.
80 */
81 private static final Dict[] TAGS = {
82 new Dict(
83 "html",
84 Dict.VERS_ALL,
85 (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST),
86 ParserImpl.HTML,
87 TagCheckImpl.HTML),
88 new Dict("head", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.HEAD, null),
89 new Dict("title", Dict.VERS_ALL, Dict.CM_HEAD, ParserImpl.TITLE, null),
90 new Dict("base", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
91 new Dict("link", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.LINK),
92 new Dict("meta", Dict.VERS_ALL, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, TagCheckImpl.META),
93 new Dict(
94 "style",
95 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
96 Dict.CM_HEAD,
97 ParserImpl.SCRIPT,
98 TagCheckImpl.STYLE),
99 new Dict(
100 "script",
101 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
102 (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
103 ParserImpl.SCRIPT,
104 TagCheckImpl.SCRIPT),
105 new Dict(
106 "server",
107 Dict.VERS_NETSCAPE,
108 (Dict.CM_HEAD | Dict.CM_MIXED | Dict.CM_BLOCK | Dict.CM_INLINE),
109 ParserImpl.SCRIPT,
110 null),
111 new Dict("body", Dict.VERS_ALL, (Dict.CM_HTML | Dict.CM_OPT | Dict.CM_OMITST), ParserImpl.BODY, null),
112 new Dict("frameset", Dict.VERS_FRAMESET, (Dict.CM_HTML | Dict.CM_FRAMES), ParserImpl.FRAMESET, null),
113 new Dict("p", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OPT), ParserImpl.INLINE, null),
114 new Dict("h1", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
115 new Dict("h2", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
116 new Dict("h3", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
117 new Dict("h4", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
118 new Dict("h5", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
119 new Dict("h6", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_HEADING), ParserImpl.INLINE, null),
120 new Dict("ul", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
121 new Dict("ol", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.LIST, null),
122 new Dict("dl", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.DEFLIST, null),
123 new Dict("dir", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null),
124 new Dict("menu", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.LIST, null),
125 new Dict("pre", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.PRE, null),
126 new Dict("listing", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
127 new Dict("xmp", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
128 new Dict("plaintext", Dict.VERS_ALL, (Dict.CM_BLOCK | Dict.CM_OBSOLETE), ParserImpl.PRE, null),
129 new Dict("address", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
130 new Dict("blockquote", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
131 new Dict("form", Dict.VERS_ALL, Dict.CM_BLOCK, ParserImpl.BLOCK, TagCheckImpl.FORM),
132 new Dict("isindex", Dict.VERS_LOOSE, (Dict.CM_BLOCK | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
133 new Dict("fieldset", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_BLOCK, ParserImpl.BLOCK, null),
134 new Dict("table", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.TABLETAG, TagCheckImpl.TABLE),
135 new Dict(
136 "hr",
137 (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
138 (Dict.CM_BLOCK | Dict.CM_EMPTY),
139 ParserImpl.EMPTY,
140 TagCheckImpl.HR),
141 new Dict("div", Dict.VERS_FROM32, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
142 new Dict("multicol", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
143 new Dict("nosave", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
144 new Dict("layer", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
145 new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
146 new Dict(
147 "nolayer",
148 Dict.VERS_NETSCAPE,
149 (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
150 ParserImpl.BLOCK,
151 null),
152 new Dict("align", Dict.VERS_NETSCAPE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
153 new Dict("center", Dict.VERS_LOOSE, Dict.CM_BLOCK, ParserImpl.BLOCK, null),
154 new Dict(
155 "ins",
156 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
157 (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
158 ParserImpl.INLINE,
159 null),
160 new Dict(
161 "del",
162 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
163 (Dict.CM_INLINE | Dict.CM_BLOCK | Dict.CM_MIXED),
164 ParserImpl.INLINE,
165 null),
166 new Dict("li", Dict.VERS_ALL, (Dict.CM_LIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
167 new Dict("dt", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.INLINE, null),
168 new Dict("dd", Dict.VERS_ALL, (Dict.CM_DEFLIST | Dict.CM_OPT | Dict.CM_NO_INDENT), ParserImpl.BLOCK, null),
169 new Dict("caption", Dict.VERS_FROM32, Dict.CM_TABLE, ParserImpl.INLINE, TagCheckImpl.CAPTION),
170 new Dict("colgroup", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.COLGROUP, null),
171 new Dict("col", Dict.VERS_HTML40, (Dict.CM_TABLE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
172 new Dict(
173 "thead",
174 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
175 (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
176 ParserImpl.ROWGROUP,
177 null),
178 new Dict(
179 "tfoot",
180 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
181 (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
182 ParserImpl.ROWGROUP,
183 null),
184 new Dict(
185 "tbody",
186 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
187 (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_OPT),
188 ParserImpl.ROWGROUP,
189 null),
190 new Dict("tr", Dict.VERS_FROM32, (Dict.CM_TABLE | Dict.CM_OPT), ParserImpl.ROW, null),
191 new Dict(
192 "td",
193 Dict.VERS_FROM32,
194 (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
195 ParserImpl.BLOCK,
196 TagCheckImpl.TABLECELL),
197 new Dict(
198 "th",
199 Dict.VERS_FROM32,
200 (Dict.CM_ROW | Dict.CM_OPT | Dict.CM_NO_INDENT),
201 ParserImpl.BLOCK,
202 TagCheckImpl.TABLECELL),
203 new Dict("q", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
204 new Dict("a", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, TagCheckImpl.ANCHOR),
205 new Dict("br", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
206 new Dict(
207 "img",
208 Dict.VERS_ALL,
209 (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY),
210 ParserImpl.EMPTY,
211 TagCheckImpl.IMG),
212 new Dict(
213 "object",
214 Dict.VERS_HTML40,
215 (Dict.CM_OBJECT | Dict.CM_HEAD | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
216 ParserImpl.BLOCK,
217 null),
218 new Dict(
219 "applet",
220 Dict.VERS_LOOSE,
221 (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
222 ParserImpl.BLOCK,
223 null),
224 new Dict(
225 "servlet",
226 Dict.VERS_SUN,
227 (Dict.CM_OBJECT | Dict.CM_IMG | Dict.CM_INLINE | Dict.CM_PARAM),
228 ParserImpl.BLOCK,
229 null),
230 new Dict("param", Dict.VERS_FROM32, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
231 new Dict("embed", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
232 new Dict("noembed", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
233 new Dict("iframe", Dict.VERS_HTML40_LOOSE, Dict.CM_INLINE, ParserImpl.BLOCK, null),
234 new Dict("frame", Dict.VERS_FRAMESET, (Dict.CM_FRAMES | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
235 new Dict("noframes", Dict.VERS_IFRAME, (Dict.CM_BLOCK | Dict.CM_FRAMES), ParserImpl.NOFRAMES, null),
236 new Dict(
237 "noscript",
238 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
239 (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
240 ParserImpl.BLOCK,
241 null),
242 new Dict("b", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
243 new Dict("i", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
244 new Dict("u", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
245 new Dict("tt", (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
246 new Dict("s", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
247 new Dict("strike", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
248 new Dict("big", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
249 new Dict("small", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
250 new Dict("sub", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
251 new Dict("sup", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
252 new Dict("em", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
253 new Dict("strong", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
254 new Dict("dfn", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
255 new Dict("code", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
256 new Dict("samp", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
257 new Dict("kbd", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
258 new Dict("var", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
259 new Dict("cite", Dict.VERS_ALL, Dict.CM_INLINE, ParserImpl.INLINE, null),
260 new Dict("abbr", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
261 new Dict("acronym", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
262 new Dict("span", Dict.VERS_FROM32, Dict.CM_INLINE, ParserImpl.INLINE, null),
263 new Dict("blink", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null),
264 new Dict("nobr", Dict.VERS_PROPRIETARY, Dict.CM_INLINE, ParserImpl.INLINE, null),
265 new Dict("wbr", Dict.VERS_PROPRIETARY, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
266 new Dict("marquee", Dict.VERS_MICROSOFT, (Dict.CM_INLINE | Dict.CM_OPT), ParserImpl.INLINE, null),
267 new Dict("bgsound", Dict.VERS_MICROSOFT, (Dict.CM_HEAD | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
268 new Dict("comment", Dict.VERS_MICROSOFT, Dict.CM_INLINE, ParserImpl.INLINE, null),
269 new Dict("spacer", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
270 new Dict("keygen", Dict.VERS_NETSCAPE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
271 new Dict(
272 "nolayer",
273 Dict.VERS_NETSCAPE,
274 (Dict.CM_BLOCK | Dict.CM_INLINE | Dict.CM_MIXED),
275 ParserImpl.BLOCK,
276 null),
277 new Dict("ilayer", Dict.VERS_NETSCAPE, Dict.CM_INLINE, ParserImpl.INLINE, null),
278 new Dict(
279 "map",
280 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
281 Dict.CM_INLINE,
282 ParserImpl.BLOCK,
283 TagCheckImpl.MAP),
284 new Dict(
285 "area",
286 (short) (Dict.VERS_ALL & ~Dict.VERS_BASIC),
287 (Dict.CM_BLOCK | Dict.CM_EMPTY),
288 ParserImpl.EMPTY,
289 TagCheckImpl.AREA),
290 new Dict("input", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_IMG | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
291 new Dict("select", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.SELECT, null),
292 new Dict("option", Dict.VERS_ALL, (Dict.CM_FIELD | Dict.CM_OPT), ParserImpl.TEXT, null),
293 new Dict(
294 "optgroup",
295 (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC),
296 (Dict.CM_FIELD | Dict.CM_OPT),
297 ParserImpl.OPTGROUP,
298 null),
299 new Dict("textarea", Dict.VERS_ALL, (Dict.CM_INLINE | Dict.CM_FIELD), ParserImpl.TEXT, null),
300 new Dict("label", Dict.VERS_HTML40, Dict.CM_INLINE, ParserImpl.INLINE, null),
301 new Dict("legend", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
302 new Dict("button", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
303 new Dict("basefont", Dict.VERS_LOOSE, (Dict.CM_INLINE | Dict.CM_EMPTY), ParserImpl.EMPTY, null),
304 new Dict("font", Dict.VERS_LOOSE, Dict.CM_INLINE, ParserImpl.INLINE, null),
305 new Dict("bdo", (short) (Dict.VERS_HTML40 & ~Dict.VERS_BASIC), Dict.CM_INLINE, ParserImpl.INLINE, null),
306
307 new Dict("ruby", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
308 new Dict("rbc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
309 new Dict("rtc", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
310 new Dict("rb", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
311 new Dict("rt", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
312 new Dict("", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
313 new Dict("rp", Dict.VERS_XHTML11, Dict.CM_INLINE, ParserImpl.INLINE, null),
314
315 };
316
317 /**
318 * html tag.
319 */
320 protected Dict tagHtml;
321
322 /**
323 * head tag.
324 */
325 protected Dict tagHead;
326
327 /**
328 * body tag.
329 */
330 protected Dict tagBody;
331
332 /**
333 * frameset tag.
334 */
335 protected Dict tagFrameset;
336
337 /**
338 * frame tag.
339 */
340 protected Dict tagFrame;
341
342 /**
343 * iframe tag.
344 */
345 protected Dict tagIframe;
346
347 /**
348 * noframes tag.
349 */
350 protected Dict tagNoframes;
351
352 /**
353 * meta tag.
354 */
355 protected Dict tagMeta;
356
357 /**
358 * title tag.
359 */
360 protected Dict tagTitle;
361
362 /**
363 * base tag.
364 */
365 protected Dict tagBase;
366
367 /**
368 * hr tag.
369 */
370 protected Dict tagHr;
371
372 /**
373 * pre tag.
374 */
375 protected Dict tagPre;
376
377 /**
378 * listing tag.
379 */
380 protected Dict tagListing;
381
382 /**
383 * h1 tag.
384 */
385 protected Dict tagH1;
386
387 /**
388 * h2 tag.
389 */
390 protected Dict tagH2;
391
392 /**
393 * p tag.
394 */
395 protected Dict tagP;
396
397 /**
398 * ul tag.
399 */
400 protected Dict tagUl;
401
402 /**
403 * ol tag.
404 */
405 protected Dict tagOl;
406
407 /**
408 * dir tag.
409 */
410 protected Dict tagDir;
411
412 /**
413 * li tag.
414 */
415 protected Dict tagLi;
416
417 /**
418 * dt tag.
419 */
420 protected Dict tagDt;
421
422 /**
423 * dd tag.
424 */
425 protected Dict tagDd;
426
427 /**
428 * dl tag.
429 */
430 protected Dict tagDl;
431
432 /**
433 * td tag.
434 */
435 protected Dict tagTd;
436
437 /**
438 * th tag.
439 */
440 protected Dict tagTh;
441
442 /**
443 * tr tag.
444 */
445 protected Dict tagTr;
446
447 /**
448 * col tag.
449 */
450 protected Dict tagCol;
451
452 /**
453 * colgroup tag.
454 */
455 protected Dict tagColgroup;
456
457 /**
458 * br tag.
459 */
460 protected Dict tagBr;
461
462 /**
463 * a tag.
464 */
465 protected Dict tagA;
466
467 /**
468 * link tag.
469 */
470 protected Dict tagLink;
471
472 /**
473 * b tag.
474 */
475 protected Dict tagB;
476
477 /**
478 * i tag.
479 */
480 protected Dict tagI;
481
482 /**
483 * strong tag.
484 */
485 protected Dict tagStrong;
486
487 /**
488 * em tag.
489 */
490 protected Dict tagEm;
491
492 /**
493 * big tag.
494 */
495 protected Dict tagBig;
496
497 /**
498 * small tag.
499 */
500 protected Dict tagSmall;
501
502 /**
503 * param tag.
504 */
505 protected Dict tagParam;
506
507 /**
508 * option tag.
509 */
510 protected Dict tagOption;
511
512 /**
513 * optgroup tag.
514 */
515 protected Dict tagOptgroup;
516
517 /**
518 * img tag.
519 */
520 protected Dict tagImg;
521
522 /**
523 * map tag.
524 */
525 protected Dict tagMap;
526
527 /**
528 * area tag.
529 */
530 protected Dict tagArea;
531
532 /**
533 * nobr tag.
534 */
535 protected Dict tagNobr;
536
537 /**
538 * wbr tag.
539 */
540 protected Dict tagWbr;
541
542 /**
543 * font tag.
544 */
545 protected Dict tagFont;
546
547 /**
548 * spacer tag.
549 */
550 protected Dict tagSpacer;
551
552 /**
553 * layer tag.
554 */
555 protected Dict tagLayer;
556
557 /**
558 * center tag.
559 */
560 protected Dict tagCenter;
561
562 /**
563 * style tag.
564 */
565 protected Dict tagStyle;
566
567 /**
568 * script tag.
569 */
570 protected Dict tagScript;
571
572 /**
573 * noscript tag.
574 */
575 protected Dict tagNoscript;
576
577 /**
578 * table tag.
579 */
580 protected Dict tagTable;
581
582 /**
583 * caption tag.
584 */
585 protected Dict tagCaption;
586
587 /**
588 * form tag.
589 */
590 protected Dict tagForm;
591
592 /**
593 * textarea tag.
594 */
595 protected Dict tagTextarea;
596
597 /**
598 * blockquote tag.
599 */
600 protected Dict tagBlockquote;
601
602 /**
603 * applet tag.
604 */
605 protected Dict tagApplet;
606
607 /**
608 * object tag.
609 */
610 protected Dict tagObject;
611
612 /**
613 * div tag.
614 */
615 protected Dict tagDiv;
616
617 /**
618 * span tag.
619 */
620 protected Dict tagSpan;
621
622 /**
623 * input tag.
624 */
625 protected Dict tagInput;
626
627 /**
628 * tag.
629 */
630 protected Dict tagQ;
631
632 /**
633 * a proprietary tag added by Tidy, along with tag_nobr, tag_wbr.
634 */
635 protected Dict tagBlink;
636
637 /**
638 * anchor/node hash.
639 */
640 protected Anchor anchorList;
641
642 /**
643 * configuration.
644 */
645 private Configuration configuration;
646
647 /**
648 * hashTable containing tags.
649 */
650 private Map tagHashtable = new Hashtable();
651
652 /**
653 * Instantiates a new tag table with known tags.
654 */
655 protected TagTable()
656 {
657 for (int i = 0; i < TAGS.length; i++)
658 {
659 install(TAGS[i]);
660 }
661 tagHtml = lookup("html");
662 tagHead = lookup("head");
663 tagBody = lookup("body");
664 tagFrameset = lookup("frameset");
665 tagFrame = lookup("frame");
666 tagIframe = lookup("iframe");
667 tagNoframes = lookup("noframes");
668 tagMeta = lookup("meta");
669 tagTitle = lookup("title");
670 tagBase = lookup("base");
671 tagHr = lookup("hr");
672 tagPre = lookup("pre");
673 tagListing = lookup("listing");
674 tagH1 = lookup("h1");
675 tagH2 = lookup("h2");
676 tagP = lookup("p");
677 tagUl = lookup("ul");
678 tagOl = lookup("ol");
679 tagDir = lookup("dir");
680 tagLi = lookup("li");
681 tagDt = lookup("dt");
682 tagDd = lookup("dd");
683 tagDl = lookup("dl");
684 tagTd = lookup("td");
685 tagTh = lookup("th");
686 tagTr = lookup("tr");
687 tagCol = lookup("col");
688 tagColgroup = lookup("colgroup");
689 tagBr = lookup("br");
690 tagA = lookup("a");
691 tagLink = lookup("link");
692 tagB = lookup("b");
693 tagI = lookup("i");
694 tagStrong = lookup("strong");
695 tagEm = lookup("em");
696 tagBig = lookup("big");
697 tagSmall = lookup("small");
698 tagParam = lookup("param");
699 tagOption = lookup("option");
700 tagOptgroup = lookup("optgroup");
701 tagImg = lookup("img");
702 tagMap = lookup("map");
703 tagArea = lookup("area");
704 tagNobr = lookup("nobr");
705 tagWbr = lookup("wbr");
706 tagFont = lookup("font");
707 tagSpacer = lookup("spacer");
708 tagLayer = lookup("layer");
709 tagCenter = lookup("center");
710 tagStyle = lookup("style");
711 tagScript = lookup("script");
712 tagNoscript = lookup("noscript");
713 tagTable = lookup("table");
714 tagCaption = lookup("caption");
715 tagForm = lookup("form");
716 tagTextarea = lookup("textarea");
717 tagBlockquote = lookup("blockquote");
718 tagApplet = lookup("applet");
719 tagObject = lookup("object");
720 tagDiv = lookup("div");
721 tagSpan = lookup("span");
722 tagInput = lookup("input");
723 tagQ = lookup("q");
724 tagBlink = lookup("blink");
725 }
726
727 /**
728 * Setter for the current configuration instance.
729 * @param configuration configuration instance
730 */
731 public void setConfiguration(Configuration configuration)
732 {
733 this.configuration = configuration;
734 }
735
736 /**
737 * Lookup a tag definition by its name.
738 * @param name tag name
739 * @return tag definition (Dict)
740 */
741 public Dict lookup(String name)
742 {
743 return (Dict) tagHashtable.get(name);
744 }
745
746 /**
747 * Installs a new tag in the tag table, or modify an existing one.
748 * @param dict tag definition
749 * @return installed Dict instance
750 */
751 public Dict install(Dict dict)
752 {
753 Dict d = (Dict) tagHashtable.get(dict.name);
754 if (d != null)
755 {
756 d.versions = dict.versions;
757 d.model |= dict.model;
758 d.setParser(dict.getParser());
759 d.setChkattrs(dict.getChkattrs());
760 return d;
761 }
762
763 tagHashtable.put(dict.name, dict);
764 return dict;
765
766 }
767
768 /**
769 * Finds a tag by name.
770 * @param node Node to find. If the element is found the tag property of node will be set.
771 * @return true if the tag is found, false otherwise
772 */
773 public boolean findTag(Node node)
774 {
775 Dict np;
776
777 if (configuration != null && configuration.xmlTags)
778 {
779 node.tag = XML_TAGS;
780 return true;
781 }
782
783 if (node.element != null)
784 {
785 np = lookup(node.element);
786 if (np != null)
787 {
788 node.tag = np;
789 return true;
790 }
791 }
792
793 return false;
794 }
795
796 /**
797 * Finds a parser fo the given node.
798 * @param node Node
799 * @return parser for the node
800 */
801 public Parser findParser(Node node)
802 {
803 Dict np;
804
805 if (node.element != null)
806 {
807 np = lookup(node.element);
808 if (np != null)
809 {
810 return np.getParser();
811 }
812 }
813
814 return null;
815 }
816
817 /**
818 * May id or name serve as anchor?
819 * @param node Node
820 * @return <code>true</code> if tag can serve as an anchor
821 */
822 boolean isAnchorElement(Node node)
823 {
824 return node.tag == this.tagA
825 || node.tag == this.tagApplet
826 || node.tag == this.tagForm
827 || node.tag == this.tagFrame
828 || node.tag == this.tagIframe
829 || node.tag == this.tagImg
830 || node.tag == this.tagMap;
831 }
832
833 /**
834 * Defines a new tag.
835 * @param tagType tag type. Can be TAGTYPE_BLOCK | TAGTYPE_EMPTY | TAGTYPE_PRE | TAGTYPE_INLINE
836 * @param name tag name
837 */
838 public void defineTag(short tagType, String name)
839 {
840 Parser tagParser;
841 short model;
842
843 switch (tagType)
844 {
845 case Dict.TAGTYPE_BLOCK :
846 model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
847 tagParser = ParserImpl.BLOCK;
848 break;
849
850 case Dict.TAGTYPE_EMPTY :
851 model = (short) (Dict.CM_EMPTY | Dict.CM_NO_INDENT | Dict.CM_NEW);
852 tagParser = ParserImpl.BLOCK;
853 break;
854
855 case Dict.TAGTYPE_PRE :
856 model = (short) (Dict.CM_BLOCK | Dict.CM_NO_INDENT | Dict.CM_NEW);
857 tagParser = ParserImpl.PRE;
858 break;
859
860 case Dict.TAGTYPE_INLINE :
861 default :
862
863 model = (short) (Dict.CM_INLINE | Dict.CM_NO_INDENT | Dict.CM_NEW);
864 tagParser = ParserImpl.INLINE;
865 break;
866 }
867
868 install(new Dict(name, Dict.VERS_PROPRIETARY, model, tagParser, null));
869 }
870
871 /**
872 * return a List containing all the user-defined tag names.
873 * @param tagType one of Dict.TAGTYPE_EMPTY | Dict.TAGTYPE_INLINE | Dict.TAGTYPE_BLOCK | Dict.TAGTYPE_PRE
874 * @return List containing all the user-defined tag names
875 */
876 List findAllDefinedTag(short tagType)
877 {
878 List tagNames = new ArrayList();
879
880 Iterator iterator = tagHashtable.values().iterator();
881 while (iterator.hasNext())
882 {
883 Dict curDictEntry = (Dict) iterator.next();
884
885 if (curDictEntry != null)
886 {
887 switch (tagType)
888 {
889
890 case Dict.TAGTYPE_EMPTY :
891 if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
892 && ((curDictEntry.model & Dict.CM_EMPTY) == Dict.CM_EMPTY)
893 &&
894 (curDictEntry != tagWbr))
895 {
896 tagNames.add(curDictEntry.name);
897 }
898 break;
899
900
901 case Dict.TAGTYPE_INLINE :
902 if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
903 && ((curDictEntry.model & Dict.CM_INLINE) == Dict.CM_INLINE)
904 &&
905 (curDictEntry != tagBlink)
906 && (curDictEntry != tagNobr)
907 && (curDictEntry != tagWbr))
908 {
909 tagNames.add(curDictEntry.name);
910 }
911 break;
912
913
914 case Dict.TAGTYPE_BLOCK :
915 if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
916 && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
917 && (curDictEntry.getParser() == ParserImpl.BLOCK))
918 {
919 tagNames.add(curDictEntry.name);
920 }
921 break;
922
923 case Dict.TAGTYPE_PRE :
924 if ((curDictEntry.versions == Dict.VERS_PROPRIETARY)
925 && ((curDictEntry.model & Dict.CM_BLOCK) == Dict.CM_BLOCK)
926 && (curDictEntry.getParser() == ParserImpl.PRE))
927 {
928 tagNames.add(curDictEntry.name);
929 }
930 break;
931 }
932 }
933 }
934
935 return tagNames;
936 }
937
938 /**
939 * Free node's attributes.
940 * @param node Node
941 */
942 public void freeAttrs(Node node)
943 {
944 while (node.attributes != null)
945 {
946 AttVal av = node.attributes;
947 if ("id".equalsIgnoreCase(av.attribute) || "name".equalsIgnoreCase(av.attribute) && isAnchorElement(node))
948 {
949 removeAnchorByNode(node);
950 }
951
952 node.attributes = av.next;
953 }
954 }
955
956 /**
957 * Removes anchor for specific node.
958 * @param node Node
959 */
960 void removeAnchorByNode(Node node)
961 {
962 Anchor delme = null;
963 Anchor found = null;
964 Anchor prev = null;
965 Anchor next = null;
966
967 for (found = anchorList; found != null; found = found.next)
968 {
969 next = found.next;
970
971 if (found.node == node)
972 {
973 if (prev != null)
974 {
975 prev.next = next;
976 }
977 else
978 {
979 anchorList = next;
980 }
981
982 delme = found;
983 }
984 else
985 {
986 prev = found;
987 }
988 }
989 if (delme != null)
990 {
991 delme = null;
992 }
993 }
994
995 /**
996 * Initialize a new anchor.
997 * @return a new anchor element
998 */
999 Anchor newAnchor()
1000 {
1001 Anchor a = new Anchor();
1002 return a;
1003 }
1004
1005 /**
1006 * Adds a new anchor to namespace.
1007 * @param name anchor name
1008 * @param node destination for this anchor
1009 * @return Anchor
1010 */
1011 Anchor addAnchor(String name, Node node)
1012 {
1013 Anchor a = newAnchor();
1014
1015 a.name = name;
1016 a.node = node;
1017
1018 if (anchorList == null)
1019 {
1020 anchorList = a;
1021 }
1022 else
1023 {
1024 Anchor here = anchorList;
1025
1026 while (here.next != null)
1027 {
1028 here = here.next;
1029 }
1030 here.next = a;
1031 }
1032
1033 return anchorList;
1034 }
1035
1036 /**
1037 * Return node associated with anchor.
1038 * @param name anchor name
1039 * @return node associated with anchor
1040 */
1041 Node getNodeByAnchor(String name)
1042 {
1043 Anchor found;
1044
1045 for (found = anchorList; found != null; found = found.next)
1046 {
1047 if (name.equalsIgnoreCase(found.name))
1048 {
1049 break;
1050 }
1051 }
1052
1053 if (found != null)
1054 {
1055 return found.node;
1056 }
1057
1058 return null;
1059 }
1060
1061 /**
1062 * free all anchors.
1063 */
1064 void freeAnchors()
1065 {
1066 anchorList = null;
1067 }
1068
1069 }