1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 /***
57 * HTML Parser implementation.
58 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
59 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
60 * @author Fabrizio Giustina
61 * @version $Revision: 1.53 $ ($Author: fgiust $)
62 */
63 public final class ParserImpl
64 {
65
66 /***
67 * parser for html.
68 */
69 public static final Parser HTML = new ParseHTML();
70
71 /***
72 * parser for head.
73 */
74 public static final Parser HEAD = new ParseHead();
75
76 /***
77 * parser for title.
78 */
79 public static final Parser TITLE = new ParseTitle();
80
81 /***
82 * parser for script.
83 */
84 public static final Parser SCRIPT = new ParseScript();
85
86 /***
87 * parser for body.
88 */
89 public static final Parser BODY = new ParseBody();
90
91 /***
92 * parser for frameset.
93 */
94 public static final Parser FRAMESET = new ParseFrameSet();
95
96 /***
97 * parser for inline.
98 */
99 public static final Parser INLINE = new ParseInline();
100
101 /***
102 * parser for list.
103 */
104 public static final Parser LIST = new ParseList();
105
106 /***
107 * parser for definition lists.
108 */
109 public static final Parser DEFLIST = new ParseDefList();
110
111 /***
112 * parser for pre.
113 */
114 public static final Parser PRE = new ParsePre();
115
116 /***
117 * parser for block elements.
118 */
119 public static final Parser BLOCK = new ParseBlock();
120
121 /***
122 * parser for table.
123 */
124 public static final Parser TABLETAG = new ParseTableTag();
125
126 /***
127 * parser for colgroup.
128 */
129 public static final Parser COLGROUP = new ParseColGroup();
130
131 /***
132 * parser for rowgroup.
133 */
134 public static final Parser ROWGROUP = new ParseRowGroup();
135
136 /***
137 * parser for row.
138 */
139 public static final Parser ROW = new ParseRow();
140
141 /***
142 * parser for noframes.
143 */
144 public static final Parser NOFRAMES = new ParseNoFrames();
145
146 /***
147 * parser for select.
148 */
149 public static final Parser SELECT = new ParseSelect();
150
151 /***
152 * parser for text.
153 */
154 public static final Parser TEXT = new ParseText();
155
156 /***
157 * parser for empty elements.
158 */
159 public static final Parser EMPTY = new ParseEmpty();
160
161 /***
162 * parser for optgroup.
163 */
164 public static final Parser OPTGROUP = new ParseOptGroup();
165
166 /***
167 * ParserImpl should not be instantiated.
168 */
169 private ParserImpl()
170 {
171
172 }
173
174 /***
175 * @param lexer
176 * @param node
177 * @param mode
178 */
179 protected static void parseTag(Lexer lexer, Node node, short mode)
180 {
181
182
183 if ((node.tag.model & Dict.CM_EMPTY) != 0)
184 {
185 lexer.waswhite = false;
186 }
187 else if (!((node.tag.model & Dict.CM_INLINE) != 0))
188 {
189 lexer.insertspace = false;
190 }
191
192 if (node.tag.getParser() == null)
193 {
194 return;
195 }
196
197 if (node.type == Node.START_END_TAG)
198 {
199 Node.trimEmptyElement(lexer, node);
200 return;
201 }
202
203 node.tag.getParser().parse(lexer, node, mode);
204 }
205
206 /***
207 * Move node to the head, where element is used as starting point in hunt for head. Normally called during parsing.
208 * @param lexer
209 * @param element
210 * @param node
211 */
212 protected static void moveToHead(Lexer lexer, Node element, Node node)
213 {
214 Node head;
215 node.removeNode();
216
217 TagTable tt = lexer.configuration.tt;
218
219 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
220 {
221 lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
222
223 while (element.tag != tt.tagHtml)
224 {
225 element = element.parent;
226 }
227
228 for (head = element.content; head != null; head = head.next)
229 {
230 if (head.tag == tt.tagHead)
231 {
232 head.insertNodeAtEnd(node);
233 break;
234 }
235 }
236
237 if (node.tag.getParser() != null)
238 {
239 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
240 }
241 }
242 else
243 {
244 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
245 }
246 }
247
248 /***
249 * moves given node to end of body element.
250 * @param lexer Lexer
251 * @param node Node to insert
252 */
253 static void moveNodeToBody(Lexer lexer, Node node)
254 {
255 node.removeNode();
256 Node body = lexer.root.findBody(lexer.configuration.tt);
257 body.insertNodeAtEnd(node);
258 }
259
260 /***
261 * Parser for HTML.
262 */
263 public static class ParseHTML implements Parser
264 {
265
266 /***
267 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
268 */
269 public void parse(Lexer lexer, Node html, short mode)
270 {
271 Node node, head;
272 Node frameset = null;
273 Node noframes = null;
274
275 lexer.configuration.xmlTags = false;
276 lexer.seenEndBody = false;
277 TagTable tt = lexer.configuration.tt;
278
279 while (true)
280 {
281 node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
282
283 if (node == null)
284 {
285 node = lexer.inferredTag("head");
286 break;
287 }
288
289 if (node.tag == tt.tagHead)
290 {
291 break;
292 }
293
294 if (node.tag == html.tag && node.type == Node.END_TAG)
295 {
296 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
297 continue;
298 }
299
300
301 if (Node.insertMisc(html, node))
302 {
303 continue;
304 }
305
306 lexer.ungetToken();
307 node = lexer.inferredTag("head");
308 break;
309 }
310
311 head = node;
312 html.insertNodeAtEnd(head);
313 HEAD.parse(lexer, head, mode);
314
315 while (true)
316 {
317 node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
318
319 if (node == null)
320 {
321 if (frameset == null)
322 {
323
324 node = lexer.inferredTag("body");
325 html.insertNodeAtEnd(node);
326 BODY.parse(lexer, node, mode);
327 }
328
329 return;
330 }
331
332
333 if (node.tag == html.tag)
334 {
335 if (node.type != Node.START_TAG && frameset == null)
336 {
337 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
338 }
339 else if (node.type == Node.END_TAG)
340 {
341 lexer.seenEndHtml = true;
342 }
343
344 continue;
345 }
346
347
348 if (Node.insertMisc(html, node))
349 {
350 continue;
351 }
352
353
354 if (node.tag == tt.tagBody)
355 {
356 if (node.type != Node.START_TAG)
357 {
358 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
359 continue;
360 }
361
362 if (frameset != null)
363 {
364 lexer.ungetToken();
365
366 if (noframes == null)
367 {
368 noframes = lexer.inferredTag("noframes");
369 frameset.insertNodeAtEnd(noframes);
370 lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG);
371 }
372
373 parseTag(lexer, noframes, mode);
374 continue;
375 }
376
377 lexer.constrainVersion(~Dict.VERS_FRAMESET);
378 break;
379 }
380
381
382 if (node.tag == tt.tagFrameset)
383 {
384 if (node.type != Node.START_TAG)
385 {
386 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
387 continue;
388 }
389
390 if (frameset != null)
391 {
392 lexer.report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
393 }
394 else
395 {
396 frameset = node;
397 }
398
399 html.insertNodeAtEnd(node);
400 parseTag(lexer, node, mode);
401
402
403
404 for (node = frameset.content; node != null; node = node.next)
405 {
406 if (node.tag == tt.tagNoframes)
407 {
408 noframes = node;
409 }
410 }
411 continue;
412 }
413
414
415 if (node.tag == tt.tagNoframes)
416 {
417 if (node.type != Node.START_TAG)
418 {
419 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
420 continue;
421 }
422
423 if (frameset == null)
424 {
425 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
426 node = lexer.inferredTag("body");
427 break;
428 }
429
430 if (noframes == null)
431 {
432 noframes = node;
433 frameset.insertNodeAtEnd(noframes);
434 }
435
436 parseTag(lexer, noframes, mode);
437 continue;
438 }
439
440 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
441 {
442 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
443 {
444 moveToHead(lexer, html, node);
445 continue;
446 }
447
448
449 if (frameset != null && node.tag == tt.tagFrame)
450 {
451 lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
452 continue;
453 }
454 }
455
456 lexer.ungetToken();
457
458
459 if (frameset != null)
460 {
461 if (noframes == null)
462 {
463 noframes = lexer.inferredTag("noframes");
464 frameset.insertNodeAtEnd(noframes);
465 }
466 else
467 {
468 lexer.report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
469 }
470
471 lexer.constrainVersion(Dict.VERS_FRAMESET);
472 parseTag(lexer, noframes, mode);
473 continue;
474 }
475
476 node = lexer.inferredTag("body");
477 lexer.constrainVersion(~Dict.VERS_FRAMESET);
478 break;
479 }
480
481
482 html.insertNodeAtEnd(node);
483 parseTag(lexer, node, mode);
484 lexer.seenEndHtml = true;
485 }
486
487 }
488
489 /***
490 * Parser for HEAD.
491 */
492 public static class ParseHead implements Parser
493 {
494
495 /***
496 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
497 */
498 public void parse(Lexer lexer, Node head, short mode)
499 {
500 Node node;
501 int hasTitle = 0;
502 int hasBase = 0;
503 TagTable tt = lexer.configuration.tt;
504
505 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
506 {
507 if (node.tag == head.tag && node.type == Node.END_TAG)
508 {
509 head.closed = true;
510 break;
511 }
512
513 if (node.type == Node.TEXT_NODE)
514 {
515 lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
516 lexer.ungetToken();
517 break;
518 }
519
520
521 if (Node.insertMisc(head, node))
522 {
523 continue;
524 }
525
526 if (node.type == Node.DOCTYPE_TAG)
527 {
528 Node.insertDocType(lexer, head, node);
529 continue;
530 }
531
532
533 if (node.tag == null)
534 {
535 lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
536 continue;
537 }
538
539 if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD))
540 {
541
542 if (lexer.isvoyager)
543 {
544 lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
545 }
546 lexer.ungetToken();
547 break;
548 }
549
550 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
551 {
552 if (node.tag == tt.tagTitle)
553 {
554 ++hasTitle;
555
556 if (hasTitle > 1)
557 {
558 lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
559 }
560 }
561 else if (node.tag == tt.tagBase)
562 {
563 ++hasBase;
564
565 if (hasBase > 1)
566 {
567 lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
568 }
569 }
570 else if (node.tag == tt.tagNoscript)
571 {
572 lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
573 }
574
575 head.insertNodeAtEnd(node);
576 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
577 continue;
578 }
579
580
581 lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
582 }
583
584 if (hasTitle == 0)
585 {
586 if (!lexer.configuration.bodyOnly)
587 {
588 lexer.report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
589 }
590 head.insertNodeAtEnd(lexer.inferredTag("title"));
591 }
592 }
593
594 }
595
596 /***
597 * Parser for TITLE.
598 */
599 public static class ParseTitle implements Parser
600 {
601
602 /***
603 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
604 */
605 public void parse(Lexer lexer, Node title, short mode)
606 {
607 Node node;
608
609 while ((node = lexer.getToken(Lexer.MIXED_CONTENT)) != null)
610 {
611
612 if (node.tag == title.tag && node.type == Node.START_TAG)
613 {
614 lexer.report.warning(lexer, title, node, Report.COERCE_TO_ENDTAG);
615 node.type = Node.END_TAG;
616 continue;
617 }
618 else if (node.tag == title.tag && node.type == Node.END_TAG)
619 {
620 title.closed = true;
621 Node.trimSpaces(lexer, title);
622 return;
623 }
624
625 if (node.type == Node.TEXT_NODE)
626 {
627
628 if (title.content == null)
629 {
630 Node.trimInitialSpace(lexer, title, node);
631 }
632
633 if (node.start >= node.end)
634 {
635 continue;
636 }
637
638 title.insertNodeAtEnd(node);
639 continue;
640 }
641
642
643 if (Node.insertMisc(title, node))
644 {
645 continue;
646 }
647
648
649 if (node.tag == null)
650 {
651 lexer.report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
652 continue;
653 }
654
655
656 lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
657 lexer.ungetToken();
658 Node.trimSpaces(lexer, title);
659 return;
660 }
661
662 lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
663 }
664
665 }
666
667 /***
668 * Parser for SCRIPT.
669 */
670 public static class ParseScript implements Parser
671 {
672
673 /***
674 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
675 */
676 public void parse(Lexer lexer, Node script, short mode)
677 {
678
679
680
681
682 Node node = lexer.getCDATA(script);
683
684 if (node != null)
685 {
686 script.insertNodeAtEnd(node);
687 }
688 }
689
690 }
691
692 /***
693 * Parser for BODY.
694 */
695 public static class ParseBody implements Parser
696 {
697
698 /***
699 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
700 */
701 public void parse(Lexer lexer, Node body, short mode)
702 {
703 Node node;
704 boolean checkstack, iswhitenode;
705
706 mode = Lexer.IGNORE_WHITESPACE;
707 checkstack = true;
708 TagTable tt = lexer.configuration.tt;
709
710 Clean.bumpObject(lexer, body.parent);
711
712 while ((node = lexer.getToken(mode)) != null)
713 {
714
715
716 if (node.tag == tt.tagHtml)
717 {
718 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG || lexer.seenEndHtml)
719 {
720 lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
721 }
722 else
723 {
724 lexer.seenEndHtml = true;
725 }
726
727 continue;
728 }
729
730 if (lexer.seenEndBody
731 && (node.type == Node.START_TAG || node.type == Node.END_TAG || node.type == Node.START_END_TAG))
732 {
733 lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
734 }
735
736 if (node.tag == body.tag && node.type == Node.END_TAG)
737 {
738 body.closed = true;
739 Node.trimSpaces(lexer, body);
740 lexer.seenEndBody = true;
741 mode = Lexer.IGNORE_WHITESPACE;
742
743 if (body.parent.tag == tt.tagNoframes)
744 {
745 break;
746 }
747
748 continue;
749 }
750
751 if (node.tag == tt.tagNoframes)
752 {
753 if (node.type == Node.START_TAG)
754 {
755 body.insertNodeAtEnd(node);
756 BLOCK.parse(lexer, node, mode);
757 continue;
758 }
759
760 if (node.type == Node.END_TAG && body.parent.tag == tt.tagNoframes)
761 {
762 Node.trimSpaces(lexer, body);
763 lexer.ungetToken();
764 break;
765 }
766 }
767
768 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) && body.parent.tag == tt.tagNoframes)
769 {
770 Node.trimSpaces(lexer, body);
771 lexer.ungetToken();
772 break;
773 }
774
775 iswhitenode = false;
776
777 if (node.type == Node.TEXT_NODE
778 && node.end <= node.start + 1
779 && node.textarray[node.start] == (byte) ' ')
780 {
781 iswhitenode = true;
782 }
783
784
785 if (Node.insertMisc(body, node))
786 {
787 continue;
788 }
789
790
791
792
793
794
795
796
797
798 if (node.type == Node.TEXT_NODE)
799 {
800 if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
801 {
802 continue;
803 }
804
805 if (lexer.configuration.encloseBodyText && !iswhitenode)
806 {
807 Node para;
808
809 lexer.ungetToken();
810 para = lexer.inferredTag("p");
811 body.insertNodeAtEnd(para);
812 parseTag(lexer, para, mode);
813 mode = Lexer.MIXED_CONTENT;
814 continue;
815 }
816
817
818 lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20));
819
820 if (checkstack)
821 {
822 checkstack = false;
823
824 if (lexer.inlineDup(node) > 0)
825 {
826 continue;
827 }
828 }
829
830 body.insertNodeAtEnd(node);
831 mode = Lexer.MIXED_CONTENT;
832 continue;
833 }
834
835 if (node.type == Node.DOCTYPE_TAG)
836 {
837 Node.insertDocType(lexer, body, node);
838 continue;
839 }
840
841 if (node.tag == null || node.tag == tt.tagParam)
842 {
843 lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
844 continue;
845 }
846
847
848
849
850 lexer.excludeBlocks = false;
851
852 if ((!((node.tag.model & Dict.CM_BLOCK) != 0) && !((node.tag.model & Dict.CM_INLINE) != 0))
853 || node.tag == tt.tagInput)
854 {
855
856 if (!((node.tag.model & Dict.CM_HEAD) != 0))
857 {
858 lexer.report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
859 }
860
861 if ((node.tag.model & Dict.CM_HTML) != 0)
862 {
863
864 if (node.tag == tt.tagBody && body.implicit && body.attributes == null)
865 {
866 body.attributes = node.attributes;
867 node.attributes = null;
868 }
869
870 continue;
871 }
872
873 if ((node.tag.model & Dict.CM_HEAD) != 0)
874 {
875 moveToHead(lexer, body, node);
876 continue;
877 }
878
879 if ((node.tag.model & Dict.CM_LIST) != 0)
880 {
881 lexer.ungetToken();
882 node = lexer.inferredTag("ul");
883 node.addClass("noindent");
884 lexer.excludeBlocks = true;
885 }
886 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
887 {
888 lexer.ungetToken();
889 node = lexer.inferredTag("dl");
890 lexer.excludeBlocks = true;
891 }
892 else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
893 {
894 lexer.ungetToken();
895 node = lexer.inferredTag("table");
896 lexer.excludeBlocks = true;
897 }
898 else if (node.tag == tt.tagInput)
899 {
900 lexer.ungetToken();
901 node = lexer.inferredTag("form");
902 lexer.excludeBlocks = true;
903 }
904 else
905 {
906 if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
907 {
908 lexer.ungetToken();
909 return;
910 }
911
912
913 continue;
914 }
915 }
916
917 if (node.type == Node.END_TAG)
918 {
919 if (node.tag == tt.tagBr)
920 {
921 node.type = Node.START_TAG;
922 }
923 else if (node.tag == tt.tagP)
924 {
925 Node.coerceNode(lexer, node, tt.tagBr);
926 body.insertNodeAtEnd(node);
927 node = lexer.inferredTag("br");
928 }
929 else if ((node.tag.model & Dict.CM_INLINE) != 0)
930 {
931 lexer.popInline(node);
932 }
933 }
934
935 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
936 {
937 if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
938 {
939
940
941 if (node.tag == tt.tagImg)
942 {
943 lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
944 }
945 else
946 {
947 lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20));
948 }
949
950 if (checkstack && !node.implicit)
951 {
952 checkstack = false;
953
954 if (lexer.inlineDup(node) > 0)
955 {
956 continue;
957 }
958 }
959
960 mode = Lexer.MIXED_CONTENT;
961 }
962 else
963 {
964 checkstack = true;
965 mode = Lexer.IGNORE_WHITESPACE;
966 }
967
968 if (node.implicit)
969 {
970 lexer.report.warning(lexer, body, node, Report.INSERTING_TAG);
971 }
972
973 body.insertNodeAtEnd(node);
974 parseTag(lexer, node, mode);
975 continue;
976 }
977
978
979 lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
980 }
981 }
982
983 }
984
985 /***
986 * Parser for FRAMESET.
987 */
988 public static class ParseFrameSet implements Parser
989 {
990
991 /***
992 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
993 */
994 public void parse(Lexer lexer, Node frameset, short mode)
995 {
996 Node node;
997 TagTable tt = lexer.configuration.tt;
998
999 lexer.badAccess |= Report.USING_FRAMES;
1000
1001 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1002 {
1003 if (node.tag == frameset.tag && node.type == Node.END_TAG)
1004 {
1005 frameset.closed = true;
1006 Node.trimSpaces(lexer, frameset);
1007 return;
1008 }
1009
1010
1011 if (Node.insertMisc(frameset, node))
1012 {
1013 continue;
1014 }
1015
1016 if (node.tag == null)
1017 {
1018 lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
1019 continue;
1020 }
1021
1022 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
1023 {
1024 if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
1025 {
1026 moveToHead(lexer, frameset, node);
1027 continue;
1028 }
1029 }
1030
1031 if (node.tag == tt.tagBody)
1032 {
1033 lexer.ungetToken();
1034 node = lexer.inferredTag("noframes");
1035 lexer.report.warning(lexer, frameset, node, Report.INSERTING_TAG);
1036 }
1037
1038 if (node.type == Node.START_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
1039 {
1040 frameset.insertNodeAtEnd(node);
1041 lexer.excludeBlocks = false;
1042 parseTag(lexer, node, Lexer.MIXED_CONTENT);
1043 continue;
1044 }
1045 else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
1046 {
1047 frameset.insertNodeAtEnd(node);
1048 continue;
1049 }
1050
1051
1052 lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
1053 }
1054
1055 lexer.report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
1056 }
1057
1058 }
1059
1060 /***
1061 * Parser for INLINE.
1062 */
1063 public static class ParseInline implements Parser
1064 {
1065
1066 /***
1067 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1068 */
1069 public void parse(Lexer lexer, Node element, short mode)
1070 {
1071 Node node, parent;
1072 TagTable tt = lexer.configuration.tt;
1073
1074 if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY))
1075 {
1076 return;
1077 }
1078
1079
1080
1081
1082
1083
1084 if (TidyUtils.toBoolean(element.tag.model & Dict.CM_BLOCK) || (element.tag == tt.tagDt))
1085 {
1086 lexer.inlineDup(null);
1087 }
1088 else if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
1089 {
1090
1091 lexer.pushInline(element);
1092 }
1093
1094 if (element.tag == tt.tagNobr)
1095 {
1096 lexer.badLayout |= Report.USING_NOBR;
1097 }
1098 else if (element.tag == tt.tagFont)
1099 {
1100 lexer.badLayout |= Report.USING_FONT;
1101 }
1102
1103
1104 if (mode != Lexer.PREFORMATTED)
1105 {
1106 mode = Lexer.MIXED_CONTENT;
1107 }
1108
1109 while ((node = lexer.getToken(mode)) != null)
1110 {
1111
1112 if (node.tag == element.tag && node.type == Node.END_TAG)
1113 {
1114 if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
1115 {
1116 lexer.popInline(node);
1117 }
1118
1119 if (!TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
1120 {
1121 Node.trimSpaces(lexer, element);
1122 }
1123
1124
1125
1126
1127 if (element.tag == tt.tagFont && element.content != null && element.content == element.last)
1128 {
1129 Node child = element.content;
1130
1131 if (child.tag == tt.tagA)
1132 {
1133 child.parent = element.parent;
1134 child.next = element.next;
1135 child.prev = element.prev;
1136
1137 if (child.prev != null)
1138 {
1139 child.prev.next = child;
1140 }
1141 else
1142 {
1143 child.parent.content = child;
1144 }
1145
1146 if (child.next != null)
1147 {
1148 child.next.prev = child;
1149 }
1150 else
1151 {
1152 child.parent.last = child;
1153 }
1154
1155 element.next = null;
1156 element.prev = null;
1157 element.parent = child;
1158 element.content = child.content;
1159 element.last = child.last;
1160 child.content = element;
1161 child.last = element;
1162 for (child = element.content; child != null; child = child.next)
1163 {
1164 child.parent = element;
1165 }
1166 }
1167 }
1168 element.closed = true;
1169 Node.trimSpaces(lexer, element);
1170 Node.trimEmptyElement(lexer, element);
1171 return;
1172 }
1173
1174
1175
1176
1177 if (node.type == Node.START_TAG
1178 && node.tag == element.tag
1179 && lexer.isPushed(node)
1180 && !node.implicit
1181 && !element.implicit
1182 && node.tag != null
1183 && ((node.tag.model & Dict.CM_INLINE) != 0)
1184 && node.tag != tt.tagA
1185 && node.tag != tt.tagFont
1186 && node.tag != tt.tagBig
1187 && node.tag != tt.tagSmall
1188 && node.tag != tt.tagQ)
1189 {
1190 if (element.content != null && node.attributes == null)
1191 {
1192 lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1193 node.type = Node.END_TAG;
1194 lexer.ungetToken();
1195 continue;
1196 }
1197
1198 lexer.report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
1199 }
1200 else if (lexer.isPushed(node) && node.type == Node.START_TAG && node.tag == tt.tagQ)
1201 {
1202 lexer.report.warning(lexer, element, node, Report.NESTED_QUOTATION);
1203 }
1204
1205 if (node.type == Node.TEXT_NODE)
1206 {
1207
1208 if (element.content == null && !TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
1209 {
1210 Node.trimSpaces(lexer, element);
1211 }
1212
1213 if (node.start >= node.end)
1214 {
1215 continue;
1216 }
1217
1218 element.insertNodeAtEnd(node);
1219 continue;
1220 }
1221
1222
1223 if (Node.insertMisc(element, node))
1224 {
1225 continue;
1226 }
1227
1228
1229 if (node.tag == tt.tagHtml)
1230 {
1231 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
1232 {
1233 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1234 continue;
1235 }
1236
1237
1238 lexer.ungetToken();
1239 if (!((mode & Lexer.PREFORMATTED) != 0))
1240 {
1241 Node.trimSpaces(lexer, element);
1242 }
1243 Node.trimEmptyElement(lexer, element);
1244 return;
1245 }
1246
1247
1248 if (node.tag == tt.tagP
1249 && node.type == Node.START_TAG
1250 && ((mode & Lexer.PREFORMATTED) != 0 || element.tag == tt.tagDt || element.isDescendantOf(tt.tagDt)))
1251 {
1252 node.tag = tt.tagBr;
1253 node.element = "br";
1254 Node.trimSpaces(lexer, element);
1255 element.insertNodeAtEnd(node);
1256 continue;
1257 }
1258
1259
1260 if (node.tag == null || node.tag == tt.tagParam)
1261 {
1262 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1263 continue;
1264 }
1265
1266 if (node.tag == tt.tagBr && node.type == Node.END_TAG)
1267 {
1268 node.type = Node.START_TAG;
1269 }
1270
1271 if (node.type == Node.END_TAG)
1272 {
1273
1274 if (node.tag == tt.tagBr)
1275 {
1276 node.type = Node.START_TAG;
1277 }
1278 else if (node.tag == tt.tagP)
1279 {
1280
1281 if (!element.isDescendantOf(tt.tagP))
1282 {
1283 Node.coerceNode(lexer, node, tt.tagBr);
1284 Node.trimSpaces(lexer, element);
1285 element.insertNodeAtEnd(node);
1286 node = lexer.inferredTag("br");
1287 continue;
1288 }
1289 }
1290 else if ((node.tag.model & Dict.CM_INLINE) != 0
1291 && node.tag != tt.tagA
1292 && !((node.tag.model & Dict.CM_OBJECT) != 0)
1293 && (element.tag.model & Dict.CM_INLINE) != 0)
1294 {
1295
1296 lexer.popInline(element);
1297
1298 if (element.tag != tt.tagA)
1299 {
1300 if (node.tag == tt.tagA && node.tag != element.tag)
1301 {
1302 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1303 lexer.ungetToken();
1304 }
1305 else
1306 {
1307 lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1308 }
1309
1310 if (!((mode & Lexer.PREFORMATTED) != 0))
1311 {
1312 Node.trimSpaces(lexer, element);
1313 }
1314 Node.trimEmptyElement(lexer, element);
1315 return;
1316 }
1317
1318
1319 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1320 continue;
1321 }
1322 else if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
1323 {
1324 lexer.ungetToken();
1325 Node.trimSpaces(lexer, element);
1326 Node.trimEmptyElement(lexer, element);
1327 return;
1328 }
1329 }
1330
1331
1332 if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1333 {
1334 if (node.tag == element.tag)
1335 {
1336 lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1337 }
1338 else
1339 {
1340 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1341 lexer.ungetToken();
1342 }
1343 if (!((mode & Lexer.PREFORMATTED) != 0))
1344 {
1345 Node.trimSpaces(lexer, element);
1346 }
1347 Node.trimEmptyElement(lexer, element);
1348 return;
1349 }
1350
1351
1352
1353
1354
1355 if (node.tag == tt.tagA
1356 && !node.implicit
1357 && (element.tag == tt.tagA || element.isDescendantOf(tt.tagA)))
1358 {
1359
1360
1361
1362
1363 if (node.type != Node.END_TAG && node.attributes == null)
1364 {
1365 node.type = Node.END_TAG;
1366 lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1367
1368 lexer.ungetToken();
1369 continue;
1370 }
1371
1372 lexer.ungetToken();
1373 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1374
1375 if (!((mode & Lexer.PREFORMATTED) != 0))
1376 {
1377 Node.trimSpaces(lexer, element);
1378 }
1379 Node.trimEmptyElement(lexer, element);
1380 return;
1381 }
1382
1383 if ((element.tag.model & Dict.CM_HEADING) != 0)
1384 {
1385 if (node.tag == tt.tagCenter || node.tag == tt.tagDiv)
1386 {
1387 if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1388 {
1389 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1390 continue;
1391 }
1392
1393 lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1394
1395
1396 if (element.content == null)
1397 {
1398 Node.insertNodeAsParent(element, node);
1399 continue;
1400 }
1401
1402
1403 element.insertNodeAfterElement(node);
1404
1405 if (!((mode & Lexer.PREFORMATTED) != 0))
1406 {
1407 Node.trimSpaces(lexer, element);
1408 }
1409
1410 element = lexer.cloneNode(element);
1411 element.start = lexer.lexsize;
1412 element.end = lexer.lexsize;
1413 node.insertNodeAtEnd(element);
1414 continue;
1415 }
1416
1417 if (node.tag == tt.tagHr)
1418 {
1419 if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1420 {
1421 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1422 continue;
1423 }
1424
1425 lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1426
1427
1428 if (element.content == null)
1429 {
1430 Node.insertNodeBeforeElement(element, node);
1431 continue;
1432 }
1433
1434
1435 element.insertNodeAfterElement(node);
1436
1437 if (!((mode & Lexer.PREFORMATTED) != 0))
1438 {
1439 Node.trimSpaces(lexer, element);
1440 }
1441
1442 element = lexer.cloneNode(element);
1443 element.start = lexer.lexsize;
1444 element.end = lexer.lexsize;
1445 node.insertNodeAfterElement(element);
1446 continue;
1447 }
1448 }
1449
1450 if (element.tag == tt.tagDt)
1451 {
1452 if (node.tag == tt.tagHr)
1453 {
1454 Node dd;
1455
1456 if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1457 {
1458 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1459 continue;
1460 }
1461
1462 lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1463 dd = lexer.inferredTag("dd");
1464
1465
1466 if (element.content == null)
1467 {
1468 Node.insertNodeBeforeElement(element, dd);
1469 dd.insertNodeAtEnd(node);
1470 continue;
1471 }
1472
1473
1474 element.insertNodeAfterElement(dd);
1475 dd.insertNodeAtEnd(node);
1476
1477 if (!((mode & Lexer.PREFORMATTED) != 0))
1478 {
1479 Node.trimSpaces(lexer, element);
1480 }
1481
1482 element = lexer.cloneNode(element);
1483 element.start = lexer.lexsize;
1484 element.end = lexer.lexsize;
1485 dd.insertNodeAfterElement(element);
1486 continue;
1487 }
1488 }
1489
1490
1491
1492 if (node.type == Node.END_TAG)
1493 {
1494 for (parent = element.parent; parent != null; parent = parent.parent)
1495 {
1496 if (node.tag == parent.tag)
1497 {
1498 if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1499 {
1500 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1501 }
1502
1503 if (element.tag == tt.tagA)
1504 {
1505 lexer.popInline(element);
1506 }
1507
1508 lexer.ungetToken();
1509
1510 if (!((mode & Lexer.PREFORMATTED) != 0))
1511 {
1512 Node.trimSpaces(lexer, element);
1513 }
1514
1515 Node.trimEmptyElement(lexer, element);
1516 return;
1517 }
1518 }
1519 }
1520
1521
1522 if (!((node.tag.model & Dict.CM_INLINE) != 0))
1523 {
1524 if (node.type != Node.START_TAG)
1525 {
1526 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1527 continue;
1528 }
1529
1530 if (!((element.tag.model & Dict.CM_OPT) != 0))
1531 {
1532 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1533 }
1534
1535 if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1536 {
1537 moveToHead(lexer, element, node);
1538 continue;
1539 }
1540
1541
1542
1543 if (element.tag == tt.tagA)
1544 {
1545 if (node.tag != null && !((node.tag.model & Dict.CM_HEADING) != 0))
1546 {
1547 lexer.popInline(element);
1548 }
1549 else if (!(element.content != null))
1550 {
1551 Node.discardElement(element);
1552 lexer.ungetToken();
1553 return;
1554 }
1555 }
1556
1557 lexer.ungetToken();
1558
1559 if (!((mode & Lexer.PREFORMATTED) != 0))
1560 {
1561 Node.trimSpaces(lexer, element);
1562 }
1563
1564 Node.trimEmptyElement(lexer, element);
1565 return;
1566 }
1567
1568
1569 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
1570 {
1571 if (node.implicit)
1572 {
1573 lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
1574 }
1575
1576
1577 if (node.tag == tt.tagBr)
1578 {
1579 Node.trimSpaces(lexer, element);
1580 }
1581
1582 element.insertNodeAtEnd(node);
1583 parseTag(lexer, node, mode);
1584 continue;
1585 }
1586
1587
1588 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1589 continue;
1590 }
1591
1592 if (!((element.tag.model & Dict.CM_OPT) != 0))
1593 {
1594 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1595 }
1596
1597 Node.trimEmptyElement(lexer, element);
1598 }
1599 }
1600
1601 /***
1602 * Parser for LIST.
1603 */
1604 public static class ParseList implements Parser
1605 {
1606
1607 public void parse(Lexer lexer, Node list, short mode)
1608 {
1609 Node node;
1610 Node parent;
1611 TagTable tt = lexer.configuration.tt;
1612
1613 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1614 {
1615 return;
1616 }
1617
1618 lexer.insert = -1;
1619
1620 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1621 {
1622 if (node.tag == list.tag && node.type == Node.END_TAG)
1623 {
1624 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1625 {
1626 Node.coerceNode(lexer, list, tt.tagUl);
1627 }
1628
1629 list.closed = true;
1630 Node.trimEmptyElement(lexer, list);
1631 return;
1632 }
1633
1634
1635 if (Node.insertMisc(list, node))
1636 {
1637 continue;
1638 }
1639
1640 if (node.type != Node.TEXT_NODE && node.tag == null)
1641 {
1642 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1643 continue;
1644 }
1645
1646
1647
1648 if (node.type == Node.END_TAG)
1649 {
1650 if (node.tag == tt.tagForm)
1651 {
1652 badForm(lexer);
1653 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1654 continue;
1655 }
1656
1657 if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1658 {
1659 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1660 lexer.popInline(node);
1661 continue;
1662 }
1663
1664 for (parent = list.parent; parent != null; parent = parent.parent)
1665 {
1666 if (node.tag == parent.tag)
1667 {
1668 lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1669 lexer.ungetToken();
1670
1671 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1672 {
1673 Node.coerceNode(lexer, list, tt.tagUl);
1674 }
1675
1676 Node.trimEmptyElement(lexer, list);
1677 return;
1678 }
1679 }
1680
1681 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1682 continue;
1683 }
1684
1685 if (node.tag != tt.tagLi)
1686 {
1687 lexer.ungetToken();
1688
1689 if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0 && lexer.excludeBlocks)
1690 {
1691 lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1692 Node.trimEmptyElement(lexer, list);
1693 return;
1694 }
1695
1696 node = lexer.inferredTag("li");
1697 node.addAttribute("style", "list-style: none");
1698 lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1699 }
1700
1701
1702 list.insertNodeAtEnd(node);
1703 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
1704 }
1705
1706 if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1707 {
1708 Node.coerceNode(lexer, list, tt.tagUl);
1709 }
1710
1711 lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1712 Node.trimEmptyElement(lexer, list);
1713 }
1714
1715 }
1716
1717 /***
1718 * Parser for empty elements.
1719 */
1720 public static class ParseEmpty implements Parser
1721 {
1722
1723 /***
1724 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1725 */
1726 public void parse(Lexer lexer, Node element, short mode)
1727 {
1728 if (lexer.isvoyager)
1729 {
1730 Node node = lexer.getToken(mode);
1731 if (node != null && !(node.type == Node.END_TAG && node.tag == element.tag))
1732 {
1733 lexer.report.warning(lexer, element, node, Report.ELEMENT_NOT_EMPTY);
1734 lexer.ungetToken();
1735 }
1736 }
1737 }
1738 }
1739
1740 /***
1741 * Parser for DEFLIST.
1742 */
1743 public static class ParseDefList implements Parser
1744 {
1745
1746 /***
1747 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1748 */
1749 public void parse(Lexer lexer, Node list, short mode)
1750 {
1751 Node node, parent;
1752 TagTable tt = lexer.configuration.tt;
1753
1754 if ((list.tag.model & Dict.CM_EMPTY) != 0)
1755 {
1756 return;
1757 }
1758
1759 lexer.insert = -1;
1760
1761 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1762 {
1763 if (node.tag == list.tag && node.type == Node.END_TAG)
1764 {
1765 list.closed = true;
1766 Node.trimEmptyElement(lexer, list);
1767 return;
1768 }
1769
1770
1771 if (Node.insertMisc(list, node))
1772 {
1773 continue;
1774 }
1775
1776 if (node.type == Node.TEXT_NODE)
1777 {
1778 lexer.ungetToken();
1779 node = lexer.inferredTag("dt");
1780 lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1781 }
1782
1783 if (node.tag == null)
1784 {
1785 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1786 continue;
1787 }
1788
1789
1790
1791 if (node.type == Node.END_TAG)
1792 {
1793 if (node.tag == tt.tagForm)
1794 {
1795 badForm(lexer);
1796 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1797 continue;
1798 }
1799
1800 for (parent = list.parent; parent != null; parent = parent.parent)
1801 {
1802 if (node.tag == parent.tag)
1803 {
1804 lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1805
1806 lexer.ungetToken();
1807 Node.trimEmptyElement(lexer, list);
1808 return;
1809 }
1810 }
1811 }
1812
1813
1814 if (node.tag == tt.tagCenter)
1815 {
1816 if (list.content != null)
1817 {
1818 list.insertNodeAfterElement(node);
1819 }
1820 else
1821 {
1822
1823 Node.insertNodeBeforeElement(list, node);
1824
1825
1826 Node.discardElement(list);
1827 }
1828
1829
1830 parseTag(lexer, node, mode);
1831
1832
1833 list = lexer.inferredTag("dl");
1834 node.insertNodeAfterElement(list);
1835 continue;
1836 }
1837
1838 if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
1839 {
1840 lexer.ungetToken();
1841
1842 if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
1843 {
1844 lexer.report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1845 Node.trimEmptyElement(lexer, list);
1846 return;
1847 }
1848
1849
1850 if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1851 {
1852 Node.trimEmptyElement(lexer, list);
1853 return;
1854 }
1855
1856 node = lexer.inferredTag("dd");
1857 lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1858 }
1859
1860 if (node.type == Node.END_TAG)
1861 {
1862 lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1863 continue;
1864 }
1865
1866
1867 list.insertNodeAtEnd(node);
1868 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
1869 }
1870
1871 lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1872 Node.trimEmptyElement(lexer, list);
1873 }
1874
1875 }
1876
1877 /***
1878 * Parser for PRE.
1879 */
1880 public static class ParsePre implements Parser
1881 {
1882
1883 /***
1884 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1885 */
1886 public void parse(Lexer lexer, Node pre, short mode)
1887 {
1888 Node node;
1889 TagTable tt = lexer.configuration.tt;
1890
1891 if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1892 {
1893 return;
1894 }
1895
1896 if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1897 {
1898 Node.coerceNode(lexer, pre, tt.tagPre);
1899 }
1900
1901 lexer.inlineDup(null);
1902
1903 while ((node = lexer.getToken(Lexer.PREFORMATTED)) != null)
1904 {
1905 if (node.tag == pre.tag && node.type == Node.END_TAG)
1906 {
1907 Node.trimSpaces(lexer, pre);
1908 pre.closed = true;
1909 Node.trimEmptyElement(lexer, pre);
1910 return;
1911 }
1912
1913 if (node.tag == tt.tagHtml)
1914 {
1915 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
1916 {
1917 lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1918 }
1919
1920 continue;
1921 }
1922
1923 if (node.type == Node.TEXT_NODE)
1924 {
1925
1926 if (pre.content == null)
1927 {
1928 if (node.textarray[node.start] == (byte) '\n')
1929 {
1930 ++node.start;
1931 }
1932
1933 if (node.start >= node.end)
1934 {
1935 continue;
1936 }
1937 }
1938
1939 pre.insertNodeAtEnd(node);
1940 continue;
1941 }
1942
1943
1944 if (Node.insertMisc(pre, node))
1945 {
1946 continue;
1947 }
1948
1949
1950 if (!lexer.preContent(node))
1951 {
1952 Node newnode;
1953
1954 lexer.report.warning(lexer, pre, node, Report.UNESCAPED_ELEMENT);
1955 newnode = Node.escapeTag(lexer, node);
1956 pre.insertNodeAtEnd(newnode);
1957 continue;
1958 }
1959
1960 if (node.tag == tt.tagP)
1961 {
1962 if (node.type == Node.START_TAG)
1963 {
1964 lexer.report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1965
1966
1967 Node.trimSpaces(lexer, pre);
1968
1969
1970 Node.coerceNode(lexer, node, tt.tagBr);
1971 pre.insertNodeAtEnd(node);
1972 }
1973 else
1974 {
1975 lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1976 }
1977 continue;
1978 }
1979
1980 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
1981 {
1982
1983 if (node.tag == tt.tagBr)
1984 {
1985 Node.trimSpaces(lexer, pre);
1986 }
1987
1988 pre.insertNodeAtEnd(node);
1989 parseTag(lexer, node, Lexer.PREFORMATTED);
1990 continue;
1991 }
1992
1993
1994 lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1995 }
1996
1997 lexer.report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1998 Node.trimEmptyElement(lexer, pre);
1999 }
2000
2001 }
2002
2003 /***
2004 * Parser for block elements.
2005 */
2006 public static class ParseBlock implements Parser
2007 {
2008
2009 /***
2010 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2011 */
2012 public void parse(Lexer lexer, Node element, short mode)
2013 {
2014
2015
2016 Node node, parent;
2017 boolean checkstack;
2018 int istackbase = 0;
2019 TagTable tt = lexer.configuration.tt;
2020
2021 checkstack = true;
2022
2023 if ((element.tag.model & Dict.CM_EMPTY) != 0)
2024 {
2025 return;
2026 }
2027
2028 if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
2029 {
2030 lexer.report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
2031 }
2032
2033
2034
2035
2036
2037
2038 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2039 {
2040 istackbase = lexer.istackbase;
2041 lexer.istackbase = lexer.istack.size();
2042 }
2043
2044 if (!((element.tag.model & Dict.CM_MIXED) != 0))
2045 {
2046 lexer.inlineDup(null);
2047 }
2048
2049 mode = Lexer.IGNORE_WHITESPACE;
2050
2051 while ((node = lexer.getToken(mode)) != null)
2052 {
2053
2054 if (node.type == Node.END_TAG
2055 && node.tag != null
2056 && (node.tag == element.tag || element.was == node.tag))
2057 {
2058
2059 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2060 {
2061
2062 while (lexer.istack.size() > lexer.istackbase)
2063 {
2064 lexer.popInline(null);
2065 }
2066 lexer.istackbase = istackbase;
2067 }
2068
2069 element.closed = true;
2070 Node.trimSpaces(lexer, element);
2071 Node.trimEmptyElement(lexer, element);
2072 return;
2073 }
2074
2075 if (node.tag == tt.tagHtml || node.tag == tt.tagHead || node.tag == tt.tagBody)
2076 {
2077 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
2078 {
2079 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2080 }
2081
2082 continue;
2083 }
2084
2085 if (node.type == Node.END_TAG)
2086 {
2087 if (node.tag == null)
2088 {
2089 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2090
2091 continue;
2092 }
2093 else if (node.tag == tt.tagBr)
2094 {
2095 node.type = Node.START_TAG;
2096 }
2097 else if (node.tag == tt.tagP)
2098 {
2099 Node.coerceNode(lexer, node, tt.tagBr);
2100 element.insertNodeAtEnd(node);
2101 node = lexer.inferredTag("br");
2102 }
2103 else
2104 {
2105
2106
2107 for (parent = element.parent; parent != null; parent = parent.parent)
2108 {
2109 if (node.tag == parent.tag)
2110 {
2111 if (!((element.tag.model & Dict.CM_OPT) != 0))
2112 {
2113 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2114 }
2115
2116 lexer.ungetToken();
2117
2118 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2119 {
2120
2121 while (lexer.istack.size() > lexer.istackbase)
2122 {
2123 lexer.popInline(null);
2124 }
2125 lexer.istackbase = istackbase;
2126 }
2127
2128 Node.trimSpaces(lexer, element);
2129 Node.trimEmptyElement(lexer, element);
2130 return;
2131 }
2132 }
2133
2134 if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
2135 {
2136 lexer.ungetToken();
2137 Node.trimSpaces(lexer, element);
2138 Node.trimEmptyElement(lexer, element);
2139 return;
2140 }
2141 }
2142 }
2143
2144
2145 if (node.type == Node.TEXT_NODE)
2146 {
2147 boolean iswhitenode = false;
2148
2149 if (node.type == Node.TEXT_NODE
2150 && node.end <= node.start + 1
2151 && lexer.lexbuf[node.start] == (byte) ' ')
2152 {
2153 iswhitenode = true;
2154 }
2155
2156 if (lexer.configuration.encloseBlockText && !iswhitenode)
2157 {
2158 lexer.ungetToken();
2159 node = lexer.inferredTag("p");
2160 element.insertNodeAtEnd(node);
2161 parseTag(lexer, node, Lexer.MIXED_CONTENT);
2162 continue;
2163 }
2164
2165 if (checkstack)
2166 {
2167 checkstack = false;
2168
2169 if (!((element.tag.model & Dict.CM_MIXED) != 0))
2170 {
2171 if (lexer.inlineDup(node) > 0)
2172 {
2173 continue;
2174 }
2175 }
2176 }
2177
2178 element.insertNodeAtEnd(node);
2179 mode = Lexer.MIXED_CONTENT;
2180
2181
2182
2183 if (element.tag == tt.tagBody
2184 || element.tag == tt.tagMap
2185 || element.tag == tt.tagBlockquote
2186 || element.tag == tt.tagForm
2187 || element.tag == tt.tagNoscript)
2188 {
2189 lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
2190 }
2191 continue;
2192 }
2193
2194 if (Node.insertMisc(element, node))
2195 {
2196 continue;
2197 }
2198
2199
2200 if (node.tag == tt.tagParam)
2201 {
2202 if (((element.tag.model & Dict.CM_PARAM) != 0)
2203 && (node.type == Node.START_TAG || node.type == Node.START_END_TAG))
2204 {
2205 element.insertNodeAtEnd(node);
2206 continue;
2207 }
2208
2209
2210 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2211 continue;
2212 }
2213
2214
2215 if (node.tag == tt.tagArea)
2216 {
2217 if ((element.tag == tt.tagMap) && (node.type == Node.START_TAG || node.type == Node.START_END_TAG))
2218 {
2219 element.insertNodeAtEnd(node);
2220 continue;
2221 }
2222
2223
2224 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2225 continue;
2226 }
2227
2228
2229 if (node.tag == null)
2230 {
2231 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2232 continue;
2233 }
2234
2235
2236
2237
2238 if (!((node.tag.model & Dict.CM_INLINE) != 0))
2239 {
2240 if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
2241 {
2242 if (node.tag == tt.tagForm)
2243 {
2244 badForm(lexer);
2245 }
2246 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2247 continue;
2248 }
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260 if (element.tag == tt.tagLi)
2261 {
2262 if (node.tag == tt.tagFrame
2263 || node.tag == tt.tagFrameset
2264 || node.tag == tt.tagOptgroup
2265 || node.tag == tt.tagOption)
2266 {
2267 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2268 continue;
2269 }
2270 }
2271
2272 if (element.tag == tt.tagTd || element.tag == tt.tagTh)
2273 {
2274
2275
2276 if ((node.tag.model & Dict.CM_HEAD) != 0)
2277 {
2278 moveToHead(lexer, element, node);
2279 continue;
2280 }
2281
2282 if ((node.tag.model & Dict.CM_LIST) != 0)
2283 {
2284 lexer.ungetToken();
2285 node = lexer.inferredTag("ul");
2286 node.addClass("noindent");
2287 lexer.excludeBlocks = true;
2288 }
2289 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2290 {
2291 lexer.ungetToken();
2292 node = lexer.inferredTag("dl");
2293 lexer.excludeBlocks = true;
2294 }
2295
2296
2297 if (!((node.tag.model & Dict.CM_BLOCK) != 0))
2298 {
2299 lexer.ungetToken();
2300 Node.trimSpaces(lexer, element);
2301 Node.trimEmptyElement(lexer, element);
2302 return;
2303 }
2304 }
2305 else if ((node.tag.model & Dict.CM_BLOCK) != 0)
2306 {
2307 if (lexer.excludeBlocks)
2308 {
2309 if (!((element.tag.model & Dict.CM_OPT) != 0))
2310 {
2311 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2312 }
2313
2314 lexer.ungetToken();
2315
2316 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2317 {
2318 lexer.istackbase = istackbase;
2319 }
2320
2321 Node.trimSpaces(lexer, element);
2322 Node.trimEmptyElement(lexer, element);
2323 return;
2324 }
2325 }
2326 else
2327 {
2328
2329
2330 if ((node.tag.model & Dict.CM_HEAD) != 0)
2331 {
2332 moveToHead(lexer, element, node);
2333 continue;
2334 }
2335
2336
2337 if (element.tag == tt.tagForm && element.parent.tag == tt.tagTd && element.parent.implicit)
2338 {
2339 if (node.tag == tt.tagTd)
2340 {
2341 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2342 continue;
2343 }
2344
2345 if (node.tag == tt.tagTh)
2346 {
2347 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2348 node = element.parent;
2349 node.element = "th";
2350 node.tag = tt.tagTh;
2351 continue;
2352 }
2353 }
2354
2355 if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
2356 {
2357 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2358 }
2359
2360 lexer.ungetToken();
2361
2362 if ((node.tag.model & Dict.CM_LIST) != 0)
2363 {
2364 if (element.parent != null
2365 && element.parent.tag != null
2366 && element.parent.tag.getParser() == LIST)
2367 {
2368 Node.trimSpaces(lexer, element);
2369 Node.trimEmptyElement(lexer, element);
2370 return;
2371 }
2372
2373 node = lexer.inferredTag("ul");
2374 node.addClass("noindent");
2375 }
2376 else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2377 {
2378 if (element.parent.tag == tt.tagDl)
2379 {
2380 Node.trimSpaces(lexer, element);
2381 Node.trimEmptyElement(lexer, element);
2382 return;
2383 }
2384
2385 node = lexer.inferredTag("dl");
2386 }
2387 else if ((node.tag.model & Dict.CM_TABLE) != 0 || (node.tag.model & Dict.CM_ROW) != 0)
2388 {
2389 node = lexer.inferredTag("table");
2390 }
2391 else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2392 {
2393
2394 while (lexer.istack.size() > lexer.istackbase)
2395 {
2396 lexer.popInline(null);
2397 }
2398 lexer.istackbase = istackbase;
2399 Node.trimSpaces(lexer, element);
2400 Node.trimEmptyElement(lexer, element);
2401 return;
2402
2403 }
2404 else
2405 {
2406 Node.trimSpaces(lexer, element);
2407 Node.trimEmptyElement(lexer, element);
2408 return;
2409 }
2410 }
2411 }
2412
2413
2414 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
2415 {
2416 if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
2417 {
2418
2419 if (lexer.configuration.encloseBlockText)
2420 {
2421 lexer.ungetToken();
2422 node = lexer.inferredTag("p");
2423 element.insertNodeAtEnd(node);
2424 parseTag(lexer, node, Lexer.MIXED_CONTENT);
2425 continue;
2426 }
2427
2428 if (checkstack && !node.implicit)
2429 {
2430 checkstack = false;
2431
2432
2433 if (!TidyUtils.toBoolean(element.tag.model & Dict.CM_MIXED))
2434 {
2435 if (lexer.inlineDup(node) > 0)
2436 {
2437 continue;
2438 }
2439 }
2440 }
2441
2442 mode = Lexer.MIXED_CONTENT;
2443 }
2444 else
2445 {
2446 checkstack = true;
2447 mode = Lexer.IGNORE_WHITESPACE;
2448 }
2449
2450
2451 if (node.tag == tt.tagBr)
2452 {
2453 Node.trimSpaces(lexer, element);
2454 }
2455
2456 element.insertNodeAtEnd(node);
2457
2458 if (node.implicit)
2459 {
2460 lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
2461 }
2462
2463 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE
2464 );
2465 continue;
2466 }
2467
2468
2469 if (node.type == Node.END_TAG)
2470 {
2471 lexer.popInline(node);
2472 }
2473
2474 lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2475 continue;
2476 }
2477
2478 if (!((element.tag.model & Dict.CM_OPT) != 0))
2479 {
2480 lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2481 }
2482
2483 if ((element.tag.model & Dict.CM_OBJECT) != 0)
2484 {
2485
2486 while (lexer.istack.size() > lexer.istackbase)
2487 {
2488 lexer.popInline(null);
2489 }
2490 lexer.istackbase = istackbase;
2491 }
2492
2493 Node.trimSpaces(lexer, element);
2494 Node.trimEmptyElement(lexer, element);
2495 }
2496
2497 }
2498
2499 /***
2500 * Parser for TABLE.
2501 */
2502 public static class ParseTableTag implements Parser
2503 {
2504
2505 /***
2506 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2507 */
2508 public void parse(Lexer lexer, Node table, short mode)
2509 {
2510 Node node, parent;
2511 int istackbase;
2512 TagTable tt = lexer.configuration.tt;
2513
2514 lexer.deferDup();
2515 istackbase = lexer.istackbase;
2516 lexer.istackbase = lexer.istack.size();
2517
2518 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2519 {
2520 if (node.tag == table.tag && node.type == Node.END_TAG)
2521 {
2522 lexer.istackbase = istackbase;
2523 table.closed = true;
2524 Node.trimEmptyElement(lexer, table);
2525 return;
2526 }
2527
2528
2529 if (Node.insertMisc(table, node))
2530 {
2531 continue;
2532 }
2533
2534
2535 if (node.tag == null && node.type != Node.TEXT_NODE)
2536 {
2537 lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2538 continue;
2539 }
2540
2541
2542
2543 if (node.type != Node.END_TAG)
2544 {
2545 if (node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag == tt.tagTable)
2546 {
2547 lexer.ungetToken();
2548 node = lexer.inferredTag("tr");
2549 lexer.report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2550 }
2551 else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2552 {
2553 Node.insertNodeBeforeElement(table, node);
2554 lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2555 lexer.exiled = true;
2556
2557 if (!(node.type == Node.TEXT_NODE))
2558 {
2559 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2560 }
2561
2562 lexer.exiled = false;
2563 continue;
2564 }
2565 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2566 {
2567 moveToHead(lexer, table, node);
2568 continue;
2569 }
2570 }
2571
2572
2573
2574 if (node.type == Node.END_TAG)
2575 {
2576 if (node.tag == tt.tagForm
2577 || (node.tag != null && ((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)))
2578 {
2579 badForm(lexer);
2580 lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2581 continue;
2582 }
2583
2584 if ((node.tag != null && (node.tag.model & (Dict.CM_TABLE | Dict.CM_ROW)) != 0)
2585 || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
2586 {
2587 lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2588 continue;
2589 }
2590
2591 for (parent = table.parent; parent != null; parent = parent.parent)
2592 {
2593 if (node.tag == parent.tag)
2594 {
2595 lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2596 lexer.ungetToken();
2597 lexer.istackbase = istackbase;
2598 Node.trimEmptyElement(lexer, table);
2599 return;
2600 }
2601 }
2602 }
2603
2604 if (!((node.tag.model & Dict.CM_TABLE) != 0))
2605 {
2606 lexer.ungetToken();
2607 lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2608 lexer.istackbase = istackbase;
2609 Node.trimEmptyElement(lexer, table);
2610 return;
2611 }
2612
2613 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
2614 {
2615 table.insertNodeAtEnd(node);
2616
2617 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2618 continue;
2619 }
2620
2621
2622 lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2623 }
2624
2625 lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2626 Node.trimEmptyElement(lexer, table);
2627 lexer.istackbase = istackbase;
2628 }
2629
2630 }
2631
2632 /***
2633 * Parser for COLGROUP.
2634 */
2635 public static class ParseColGroup implements Parser
2636 {
2637
2638 /***
2639 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2640 */
2641 public void parse(Lexer lexer, Node colgroup, short mode)
2642 {
2643 Node node, parent;
2644 TagTable tt = lexer.configuration.tt;
2645
2646 if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2647 {
2648 return;
2649 }
2650
2651 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2652 {
2653 if (node.tag == colgroup.tag && node.type == Node.END_TAG)
2654 {
2655 colgroup.closed = true;
2656 return;
2657 }
2658
2659
2660
2661 if (node.type == Node.END_TAG)
2662 {
2663 if (node.tag == tt.tagForm)
2664 {
2665 badForm(lexer);
2666 lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2667 continue;
2668 }
2669
2670 for (parent = colgroup.parent; parent != null; parent = parent.parent)
2671 {
2672
2673 if (node.tag == parent.tag)
2674 {
2675 lexer.ungetToken();
2676 return;
2677 }
2678 }
2679 }
2680
2681 if (node.type == Node.TEXT_NODE)
2682 {
2683 lexer.ungetToken();
2684 return;
2685 }
2686
2687
2688 if (Node.insertMisc(colgroup, node))
2689 {
2690 continue;
2691 }
2692
2693
2694 if (node.tag == null)
2695 {
2696 lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2697 continue;
2698 }
2699
2700 if (node.tag != tt.tagCol)
2701 {
2702 lexer.ungetToken();
2703 return;
2704 }
2705
2706 if (node.type == Node.END_TAG)
2707 {
2708 lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2709 continue;
2710 }
2711
2712
2713 colgroup.insertNodeAtEnd(node);
2714 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2715 }
2716 }
2717
2718 }
2719
2720 /***
2721 * Parser for ROWGROUP.
2722 */
2723 public static class ParseRowGroup implements Parser
2724 {
2725
2726 /***
2727 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2728 */
2729 public void parse(Lexer lexer, Node rowgroup, short mode)
2730 {
2731 Node node, parent;
2732 TagTable tt = lexer.configuration.tt;
2733
2734 if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2735 {
2736 return;
2737 }
2738
2739 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2740 {
2741 if (node.tag == rowgroup.tag)
2742 {
2743 if (node.type == Node.END_TAG)
2744 {
2745 rowgroup.closed = true;
2746 Node.trimEmptyElement(lexer, rowgroup);
2747 return;
2748 }
2749
2750 lexer.ungetToken();
2751 return;
2752 }
2753
2754
2755 if (node.tag == tt.tagTable && node.type == Node.END_TAG)
2756 {
2757 lexer.ungetToken();
2758 Node.trimEmptyElement(lexer, rowgroup);
2759 return;
2760 }
2761
2762
2763 if (Node.insertMisc(rowgroup, node))
2764 {
2765 continue;
2766 }
2767
2768
2769 if (node.tag == null && node.type != Node.TEXT_NODE)
2770 {
2771 lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2772 continue;
2773 }
2774
2775
2776
2777
2778 if (node.type != Node.END_TAG)
2779 {
2780 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2781 {
2782 lexer.ungetToken();
2783 node = lexer.inferredTag("tr");
2784 lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2785 }
2786 else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2787 {
2788 Node.moveBeforeTable(rowgroup, node, tt);
2789 lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2790 lexer.exiled = true;
2791
2792
2793 if (node.type != Node.TEXT_NODE)
2794 {
2795 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2796 }
2797
2798 lexer.exiled = false;
2799 continue;
2800 }
2801 else if ((node.tag.model & Dict.CM_HEAD) != 0)
2802 {
2803 lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2804 moveToHead(lexer, rowgroup, node);
2805 continue;
2806 }
2807 }
2808
2809
2810
2811 if (node.type == Node.END_TAG)
2812 {
2813
2814 if (node.tag == tt.tagForm
2815 || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
2816 {
2817 if (node.tag == tt.tagForm)
2818 {
2819 badForm(lexer);
2820 }
2821 lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2822 continue;
2823 }
2824
2825 if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
2826 {
2827 lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2828 continue;
2829 }
2830
2831 for (parent = rowgroup.parent; parent != null; parent = parent.parent)
2832 {
2833 if (node.tag == parent.tag)
2834 {
2835 lexer.ungetToken();
2836 Node.trimEmptyElement(lexer, rowgroup);
2837 return;
2838 }
2839 }
2840
2841 }
2842
2843
2844
2845 if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2846 {
2847 if (node.type != Node.END_TAG)
2848 {
2849 lexer.ungetToken();
2850 }
2851
2852 Node.trimEmptyElement(lexer, rowgroup);
2853 return;
2854 }
2855
2856 if (node.type == Node.END_TAG)
2857 {
2858 lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2859 continue;
2860 }
2861
2862 if (!(node.tag == tt.tagTr))
2863 {
2864 node = lexer.inferredTag("tr");
2865 lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2866 lexer.ungetToken();
2867 }
2868
2869
2870 rowgroup.insertNodeAtEnd(node);
2871 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2872 }
2873 Node.trimEmptyElement(lexer, rowgroup);
2874 }
2875 }
2876
2877 /***
2878 * Parser for ROW.
2879 */
2880 public static class ParseRow implements Parser
2881 {
2882
2883 /***
2884 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2885 */
2886 public void parse(Lexer lexer, Node row, short mode)
2887 {
2888 Node node, parent;
2889 boolean excludeState;
2890 TagTable tt = lexer.configuration.tt;
2891
2892 if ((row.tag.model & Dict.CM_EMPTY) != 0)
2893 {
2894 return;
2895 }
2896
2897 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2898 {
2899 if (node.tag == row.tag)
2900 {
2901 if (node.type == Node.END_TAG)
2902 {
2903 row.closed = true;
2904 Node.fixEmptyRow(lexer, row);
2905 return;
2906 }
2907
2908 lexer.ungetToken();
2909 Node.fixEmptyRow(lexer, row);
2910 return;
2911 }
2912
2913
2914 if (node.type == Node.END_TAG)
2915 {
2916 if (node.tag == tt.tagForm
2917 || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
2918 {
2919 if (node.tag == tt.tagForm)
2920 {
2921 badForm(lexer);
2922 }
2923 lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2924 continue;
2925 }
2926
2927 if (node.tag == tt.tagTd || node.tag == tt.tagTh)
2928 {
2929 lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2930 continue;
2931 }
2932
2933 for (parent = row.parent; parent != null; parent = parent.parent)
2934 {
2935 if (node.tag == parent.tag)
2936 {
2937 lexer.ungetToken();
2938 Node.trimEmptyElement(lexer, row);
2939 return;
2940 }
2941 }
2942 }
2943
2944
2945 if (Node.insertMisc(row, node))
2946 {
2947 continue;
2948 }
2949
2950
2951 if (node.tag == null && node.type != Node.TEXT_NODE)
2952 {
2953 lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2954 continue;
2955 }
2956
2957
2958 if (node.tag == tt.tagTable)
2959 {
2960 lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2961 continue;
2962 }
2963
2964
2965 if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2966 {
2967 lexer.ungetToken();
2968 Node.trimEmptyElement(lexer, row);
2969 return;
2970 }
2971
2972 if (node.type == Node.END_TAG)
2973 {
2974 lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2975 continue;
2976 }
2977
2978
2979
2980 if (node.type != Node.END_TAG)
2981 {
2982 if (node.tag == tt.tagForm)
2983 {
2984 lexer.ungetToken();
2985 node = lexer.inferredTag("td");
2986 lexer.report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2987 }
2988 else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
2989 {
2990 Node.moveBeforeTable(row, node, tt);
2991 lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2992 lexer.exiled = true;
2993
2994 if (node.type != Node.TEXT_NODE)
2995 {
2996 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2997 }
2998
2999 lexer.exiled = false;
3000 continue;
3001 }
3002 else if ((node.tag.model & Dict.CM_HEAD) != 0)
3003 {
3004 lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
3005 moveToHead(lexer, row, node);
3006 continue;
3007 }
3008 }
3009
3010 if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
3011 {
3012 lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
3013 continue;
3014 }
3015
3016
3017 row.insertNodeAtEnd(node);
3018 excludeState = lexer.excludeBlocks;
3019 lexer.excludeBlocks = false;
3020 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3021 lexer.excludeBlocks = excludeState;
3022
3023
3024
3025 while (lexer.istack.size() > lexer.istackbase)
3026 {
3027 lexer.popInline(null);
3028 }
3029 }
3030
3031 Node.trimEmptyElement(lexer, row);
3032 }
3033
3034 }
3035
3036 /***
3037 * Parser for NOFRAMES.
3038 */
3039 public static class ParseNoFrames implements Parser
3040 {
3041
3042 /***
3043 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3044 */
3045 public void parse(Lexer lexer, Node noframes, short mode)
3046 {
3047 Node node;
3048 TagTable tt = lexer.configuration.tt;
3049
3050 lexer.badAccess |= Report.USING_NOFRAMES;
3051 mode = Lexer.IGNORE_WHITESPACE;
3052
3053 while ((node = lexer.getToken(mode)) != null)
3054 {
3055 if (node.tag == noframes.tag && node.type == Node.END_TAG)
3056 {
3057 noframes.closed = true;
3058 Node.trimSpaces(lexer, noframes);
3059 return;
3060 }
3061
3062 if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
3063 {
3064
3065 Node.trimSpaces(lexer, noframes);
3066
3067
3068 if (node.type == Node.END_TAG)
3069 {
3070 lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3071
3072 }
3073 else
3074 {
3075 lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
3076
3077 lexer.ungetToken();
3078 }
3079 return;
3080 }
3081
3082 if (node.tag == tt.tagHtml)
3083 {
3084 if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
3085 {
3086 lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3087 }
3088
3089 continue;
3090 }
3091
3092
3093 if (Node.insertMisc(noframes, node))
3094 {
3095 continue;
3096 }
3097
3098 if (node.tag == tt.tagBody && node.type == Node.START_TAG)
3099 {
3100 boolean seenbody = lexer.seenEndBody;
3101 noframes.insertNodeAtEnd(node);
3102 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3103
3104 if (seenbody)
3105 {
3106 Node.coerceNode(lexer, node, tt.tagDiv);
3107 moveNodeToBody(lexer, node);
3108 }
3109 continue;
3110 }
3111
3112
3113 if (node.type == Node.TEXT_NODE || (node.tag != null && node.type != Node.END_TAG))
3114 {
3115 if (lexer.seenEndBody)
3116 {
3117 Node body = lexer.root.findBody(tt);
3118
3119 if (node.type == Node.TEXT_NODE)
3120 {
3121 lexer.ungetToken();
3122 node = lexer.inferredTag("p");
3123 lexer.report.warning(lexer, noframes, node, Report.CONTENT_AFTER_BODY);
3124 }
3125
3126 body.insertNodeAtEnd(node);
3127 }
3128 else
3129 {
3130 lexer.ungetToken();
3131 node = lexer.inferredTag("body");
3132 if (lexer.configuration.xmlOut)
3133 {
3134 lexer.report.warning(lexer, noframes, node, Report.INSERTING_TAG);
3135 }
3136 noframes.insertNodeAtEnd(node);
3137 }
3138 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3139
3140 continue;
3141 }
3142
3143 lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3144 }
3145
3146 lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
3147 }
3148
3149 }
3150
3151 /***
3152 * Parser for SELECT.
3153 */
3154 public static class ParseSelect implements Parser
3155 {
3156
3157 /***
3158 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3159 */
3160 public void parse(Lexer lexer, Node field, short mode)
3161 {
3162 Node node;
3163 TagTable tt = lexer.configuration.tt;
3164
3165 lexer.insert = -1;
3166
3167 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3168 {
3169 if (node.tag == field.tag && node.type == Node.END_TAG)
3170 {
3171 field.closed = true;
3172 Node.trimSpaces(lexer, field);
3173 return;
3174 }
3175
3176
3177 if (Node.insertMisc(field, node))
3178 {
3179 continue;
3180 }
3181
3182 if (node.type == Node.START_TAG
3183 && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup || node.tag == tt.tagScript))
3184 {
3185 field.insertNodeAtEnd(node);
3186 parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3187 continue;
3188 }
3189
3190
3191 lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3192 }
3193
3194 lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
3195 }
3196
3197 }
3198
3199 /***
3200 * Parser for text nodes.
3201 */
3202 public static class ParseText implements Parser
3203 {
3204
3205 /***
3206 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3207 */
3208 public void parse(Lexer lexer, Node field, short mode)
3209 {
3210 Node node;
3211 TagTable tt = lexer.configuration.tt;
3212
3213 lexer.insert = -1;
3214
3215 if (field.tag == tt.tagTextarea)
3216 {
3217 mode = Lexer.PREFORMATTED;
3218 }
3219 else
3220 {
3221 mode = Lexer.MIXED_CONTENT;
3222 }
3223
3224 while ((node = lexer.getToken(mode)) != null)
3225 {
3226 if (node.tag == field.tag && node.type == Node.END_TAG)
3227 {
3228 field.closed = true;
3229 Node.trimSpaces(lexer, field);
3230 return;
3231 }
3232
3233
3234 if (Node.insertMisc(field, node))
3235 {
3236 continue;
3237 }
3238
3239 if (node.type == Node.TEXT_NODE)
3240 {
3241
3242 if (field.content == null && !((mode & Lexer.PREFORMATTED) != 0))
3243 {
3244 Node.trimSpaces(lexer, field);
3245 }
3246
3247 if (node.start >= node.end)
3248 {
3249 continue;
3250 }
3251
3252 field.insertNodeAtEnd(node);
3253 continue;
3254 }
3255
3256
3257
3258 if (node.tag != null
3259 && ((node.tag.model & Dict.CM_INLINE) != 0)
3260 && (node.tag.model & Dict.CM_FIELD) == 0)
3261 {
3262 lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3263 continue;
3264 }
3265
3266
3267 if (!((field.tag.model & Dict.CM_OPT) != 0))
3268 {
3269 lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
3270 }
3271
3272 lexer.ungetToken();
3273 Node.trimSpaces(lexer, field);
3274 return;
3275 }
3276
3277 if (!((field.tag.model & Dict.CM_OPT) != 0))
3278 {
3279 lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
3280 }
3281 }
3282
3283 }
3284
3285 /***
3286 * Parser for OPTGROUP.
3287 */
3288 public static class ParseOptGroup implements Parser
3289 {
3290
3291 /***
3292 * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3293 */
3294 public void parse(Lexer lexer, Node field, short mode)
3295 {
3296 Node node;
3297 TagTable tt = lexer.configuration.tt;
3298
3299 lexer.insert = -1;
3300
3301 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3302 {
3303 if (node.tag == field.tag && node.type == Node.END_TAG)
3304 {
3305 field.closed = true;
3306 Node.trimSpaces(lexer, field);
3307 return;
3308 }
3309
3310
3311 if (Node.insertMisc(field, node))
3312 {
3313 continue;
3314 }
3315
3316 if (node.type == Node.START_TAG && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
3317 {
3318 if (node.tag == tt.tagOptgroup)
3319 {
3320 lexer.report.warning(lexer, field, node, Report.CANT_BE_NESTED);
3321 }
3322
3323 field.insertNodeAtEnd(node);
3324 parseTag(lexer, node, Lexer.MIXED_CONTENT);
3325 continue;
3326 }
3327
3328
3329 lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3330 }
3331 }
3332
3333 }
3334
3335 /***
3336 * HTML is the top level element.
3337 */
3338 public static Node parseDocument(Lexer lexer)
3339 {
3340 Node node, document, html;
3341 Node doctype = null;
3342 TagTable tt = lexer.configuration.tt;
3343
3344 document = lexer.newNode();
3345 document.type = Node.ROOT_NODE;
3346
3347 lexer.root = document;
3348
3349 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3350 {
3351
3352 if (Node.insertMisc(document, node))
3353 {
3354 continue;
3355 }
3356
3357 if (node.type == Node.DOCTYPE_TAG)
3358 {
3359 if (doctype == null)
3360 {
3361 document.insertNodeAtEnd(node);
3362 doctype = node;
3363 }
3364 else
3365 {
3366 lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
3367 }
3368 continue;
3369 }
3370
3371 if (node.type == Node.END_TAG)
3372 {
3373 lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
3374 continue;
3375 }
3376
3377 if (node.type != Node.START_TAG || node.tag != tt.tagHtml)
3378 {
3379 lexer.ungetToken();
3380 html = lexer.inferredTag("html");
3381 }
3382 else
3383 {
3384 html = node;
3385 }
3386
3387 if (document.findDocType() == null && !lexer.configuration.bodyOnly)
3388 {
3389 lexer.report.warning(lexer, null, null, Report.MISSING_DOCTYPE);
3390 }
3391
3392 document.insertNodeAtEnd(html);
3393 HTML.parse(lexer, html, (short) 0);
3394 break;
3395 }
3396
3397 return document;
3398 }
3399
3400 /***
3401 * Indicates whether or not whitespace should be preserved for this element. If an <code>xml:space</code>
3402 * attribute is found, then if the attribute value is <code>preserve</code>, returns <code>true</code>. For
3403 * any other value, returns <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
3404 * found, then the following element names result in a return value of <code>true:
3405 * pre, script, style,</code> and
3406 * <code>xsl:text</code>. Finally, if a <code>TagTable</code> was passed in and the element appears as the
3407 * "pre" element in the <code>TagTable</code>, then <code>true</code> will be returned. Otherwise,
3408 * <code>false</code> is returned.
3409 * @param element The <code>Node</code> to test to see if whitespace should be preserved.
3410 * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> function. This may be
3411 * <code>null</code>, in which case this test is bypassed.
3412 * @return <code>true</code> or <code>false</code>, as explained above.
3413 */
3414 public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3415 {
3416 AttVal attribute;
3417
3418
3419 for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3420 {
3421 if (attribute.attribute.equals("xml:space"))
3422 {
3423 if (attribute.value.equals("preserve"))
3424 {
3425 return true;
3426 }
3427
3428 return false;
3429 }
3430 }
3431
3432 if (element.element == null)
3433 {
3434 return false;
3435 }
3436
3437
3438 if ("pre".equalsIgnoreCase(element.element)
3439 || "script".equalsIgnoreCase(element.element)
3440 || "style".equalsIgnoreCase(element.element))
3441 {
3442 return true;
3443 }
3444
3445 if ((tt != null) && (tt.findParser(element) == PRE))
3446 {
3447 return true;
3448 }
3449
3450
3451 if ("xsl:text".equalsIgnoreCase(element.element))
3452 {
3453 return true;
3454 }
3455
3456 return false;
3457 }
3458
3459 /***
3460 * XML documents.
3461 */
3462 public static void parseXMLElement(Lexer lexer, Node element, short mode)
3463 {
3464 Node node;
3465
3466
3467
3468 if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3469 {
3470 mode = Lexer.PREFORMATTED;
3471 }
3472
3473 while ((node = lexer.getToken(mode)) != null)
3474 {
3475 if (node.type == Node.END_TAG && node.element.equals(element.element))
3476 {
3477 element.closed = true;
3478 break;
3479 }
3480
3481
3482 if (node.type == Node.END_TAG)
3483 {
3484 lexer.report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3485 continue;
3486 }
3487
3488
3489 if (node.type == Node.START_TAG)
3490 {
3491 parseXMLElement(lexer, node, mode);
3492 }
3493
3494 element.insertNodeAtEnd(node);
3495 }
3496
3497
3498
3499 node = element.content;
3500
3501 if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
3502 {
3503 if (node.textarray[node.start] == (byte) ' ')
3504 {
3505 node.start++;
3506
3507 if (node.start >= node.end)
3508 {
3509 Node.discardElement(node);
3510 }
3511 }
3512 }
3513
3514
3515
3516 node = element.last;
3517
3518 if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
3519 {
3520 if (node.textarray[node.end - 1] == (byte) ' ')
3521 {
3522 node.end--;
3523
3524 if (node.start >= node.end)
3525 {
3526 Node.discardElement(node);
3527 }
3528 }
3529 }
3530 }
3531
3532 public static Node parseXMLDocument(Lexer lexer)
3533 {
3534 Node node, document, doctype;
3535
3536 document = lexer.newNode();
3537 document.type = Node.ROOT_NODE;
3538 doctype = null;
3539 lexer.configuration.xmlTags = true;
3540
3541 while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3542 {
3543
3544 if (node.type == Node.END_TAG)
3545 {
3546 lexer.report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3547 continue;
3548 }
3549
3550
3551 if (Node.insertMisc(document, node))
3552 {
3553 continue;
3554 }
3555
3556 if (node.type == Node.DOCTYPE_TAG)
3557 {
3558 if (doctype == null)
3559 {
3560 document.insertNodeAtEnd(node);
3561 doctype = node;
3562 }
3563 else
3564 {
3565 lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
3566 }
3567 continue;
3568 }
3569
3570 if (node.type == Node.START_END_TAG)
3571 {
3572 document.insertNodeAtEnd(node);
3573 continue;
3574 }
3575
3576
3577 if (node.type == Node.START_TAG)
3578 {
3579 document.insertNodeAtEnd(node);
3580 parseXMLElement(lexer, node, Lexer.IGNORE_WHITESPACE);
3581 }
3582
3583 }
3584
3585 if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3586 {
3587 lexer.report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3588 }
3589
3590
3591 if (lexer.configuration.xmlPi)
3592 {
3593 lexer.fixXmlDecl(document);
3594 }
3595
3596 return document;
3597 }
3598
3599 /***
3600 * errors in positioning of form start or end tags generally require human intervention to fix.
3601 */
3602 static void badForm(Lexer lexer)
3603 {
3604 lexer.badForm = 1;
3605 lexer.errors++;
3606 }
3607
3608 }