Maven Clover report

Clover coverage report - Maven Clover report

Coverage timestamp: Tue Aug 1 2006 15:09:51 CEST

FRAMES NO FRAMES

file stats:	LOC:	3,609		Methods:	29
	NCLOC:	2,570		Classes:	21

Source file

Conditionals

Statements

Methods

TOTAL

ParserImpl.java

68.1%

72.7%

93.1%

71.2%

1		/*
2		* Java HTML Tidy - JTidy
3		* HTML parser and pretty printer
4		*
5		* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6		* Institute of Technology, Institut National de Recherche en
7		* Informatique et en Automatique, Keio University). All Rights
8		* Reserved.
9		*
10		* Contributing Author(s):
11		*
12		* Dave Raggett <dsr@w3.org>
13		* Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14		* Gary L Peskin <garyp@firstech.com> (Java development)
15		* Sami Lempinen <sami@lempinen.net> (release management)
16		* Fabrizio Giustina <fgiust at users.sourceforge.net>
17		*
18		* The contributing author(s) would like to thank all those who
19		* helped with testing, bug fixes, and patience. This wouldn't
20		* have been possible without all of you.
21		*
22		* COPYRIGHT NOTICE:
23		*
24		* This software and documentation is provided "as is," and
25		* the copyright holders and contributing author(s) make no
26		* representations or warranties, express or implied, including
27		* but not limited to, warranties of merchantability or fitness
28		* for any particular purpose or that the use of the software or
29		* documentation will not infringe any third party patents,
30		* copyrights, trademarks or other rights.
31		*
32		* The copyright holders and contributing author(s) will not be
33		* liable for any direct, indirect, special or consequential damages
34		* arising out of any use of the software or documentation, even if
35		* advised of the possibility of such damage.
36		*
37		* Permission is hereby granted to use, copy, modify, and distribute
38		* this source code, or portions hereof, documentation and executables,
39		* for any purpose, without fee, subject to the following restrictions:
40		*
41		* 1. The origin of this source code must not be misrepresented.
42		* 2. Altered versions must be plainly marked as such and must
43		* not be misrepresented as being the original source.
44		* 3. This Copyright notice may not be removed or altered from any
45		* source or altered source distribution.
46		*
47		* The copyright holders and contributing author(s) specifically
48		* permit, without fee, and encourage the use of this source code
49		* as a component for supporting the Hypertext Markup Language in
50		* commercial products. If you use this source code in a product,
51		* acknowledgment is not required but would be appreciated.
52		*
53		*/
54		package org.w3c.tidy;
55
56		/**
57		* HTML Parser implementation.
58		* @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
59		* @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
60		* @author Fabrizio Giustina
61		* @version $Revision: 806 $ ($Author: fgiust $)
62		*/
63		public final class ParserImpl
64		{
65
66		/**
67		* parser for html.
68		*/
69		public static final Parser HTML = new ParseHTML();
70
71		/**
72		* parser for head.
73		*/
74		public static final Parser HEAD = new ParseHead();
75
76		/**
77		* parser for title.
78		*/
79		public static final Parser TITLE = new ParseTitle();
80
81		/**
82		* parser for script.
83		*/
84		public static final Parser SCRIPT = new ParseScript();
85
86		/**
87		* parser for body.
88		*/
89		public static final Parser BODY = new ParseBody();
90
91		/**
92		* parser for frameset.
93		*/
94		public static final Parser FRAMESET = new ParseFrameSet();
95
96		/**
97		* parser for inline.
98		*/
99		public static final Parser INLINE = new ParseInline();
100
101		/**
102		* parser for list.
103		*/
104		public static final Parser LIST = new ParseList();
105
106		/**
107		* parser for definition lists.
108		*/
109		public static final Parser DEFLIST = new ParseDefList();
110
111		/**
112		* parser for pre.
113		*/
114		public static final Parser PRE = new ParsePre();
115
116		/**
117		* parser for block elements.
118		*/
119		public static final Parser BLOCK = new ParseBlock();
120
121		/**
122		* parser for table.
123		*/
124		public static final Parser TABLETAG = new ParseTableTag();
125
126		/**
127		* parser for colgroup.
128		*/
129		public static final Parser COLGROUP = new ParseColGroup();
130
131		/**
132		* parser for rowgroup.
133		*/
134		public static final Parser ROWGROUP = new ParseRowGroup();
135
136		/**
137		* parser for row.
138		*/
139		public static final Parser ROW = new ParseRow();
140
141		/**
142		* parser for noframes.
143		*/
144		public static final Parser NOFRAMES = new ParseNoFrames();
145
146		/**
147		* parser for select.
148		*/
149		public static final Parser SELECT = new ParseSelect();
150
151		/**
152		* parser for text.
153		*/
154		public static final Parser TEXT = new ParseText();
155
156		/**
157		* parser for empty elements.
158		*/
159		public static final Parser EMPTY = new ParseEmpty();
160
161		/**
162		* parser for optgroup.
163		*/
164		public static final Parser OPTGROUP = new ParseOptGroup();
165
166		/**
167		* ParserImpl should not be instantiated.
168		*/
169	0	private ParserImpl()
170		{
171		// unused
172		}
173
174		/**
175		* @param lexer
176		* @param node
177		* @param mode
178		*/
179	7242	protected static void parseTag(Lexer lexer, Node node, short mode)
180		{
181		// Fix by GLP 2000-12-21. Need to reset insertspace if this
182		// is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
183	7242	if ((node.tag.model & Dict.CM_EMPTY) != 0)
184		{
185	1903	lexer.waswhite = false;
186		}
187	5339	else if (!((node.tag.model & Dict.CM_INLINE) != 0))
188		{
189	2750	lexer.insertspace = false;
190		}
191
192	7242	if (node.tag.getParser() == null)
193		{
194	0	return;
195		}
196
197	7242	if (node.type == Node.START_END_TAG)
198		{
199	82	Node.trimEmptyElement(lexer, node);
200	82	return;
201		}
202
203	7160	node.tag.getParser().parse(lexer, node, mode);
204		}
205
206		/**
207		* Move node to the head, where element is used as starting point in hunt for head. Normally called during parsing.
208		* @param lexer
209		* @param element
210		* @param node
211		*/
212	1	protected static void moveToHead(Lexer lexer, Node element, Node node)
213		{
214	1	Node head;
215	1	node.removeNode(); // make sure that node is isolated
216
217	1	TagTable tt = lexer.configuration.tt;
218
219	1	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
220		{
221	0	lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
222
223	0	while (element.tag != tt.tagHtml)
224		{
225	0	element = element.parent;
226		}
227
228	0	for (head = element.content; head != null; head = head.next)
229		{
230	0	if (head.tag == tt.tagHead)
231		{
232	0	head.insertNodeAtEnd(node);
233	0	break;
234		}
235		}
236
237	0	if (node.tag.getParser() != null)
238		{
239	0	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
240		}
241		}
242		else
243		{
244	1	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
245		}
246		}
247
248		/**
249		* moves given node to end of body element.
250		* @param lexer Lexer
251		* @param node Node to insert
252		*/
253	1	static void moveNodeToBody(Lexer lexer, Node node)
254		{
255	1	node.removeNode();
256	1	Node body = lexer.root.findBody(lexer.configuration.tt);
257	1	body.insertNodeAtEnd(node);
258		}
259
260		/**
261		* Parser for HTML.
262		*/
263		public static class ParseHTML implements Parser
264		{
265
266		/**
267		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
268		*/
269	221	public void parse(Lexer lexer, Node html, short mode)
270		{
271	221	Node node, head;
272	221	Node frameset = null;
273	221	Node noframes = null;
274
275	221	lexer.configuration.xmlTags = false;
276	221	lexer.seenEndBody = false;
277	221	TagTable tt = lexer.configuration.tt;
278
279	221	while (true)
280		{
281	225	node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
282
283	225	if (node == null)
284		{
285	0	node = lexer.inferredTag("head");
286	0	break;
287		}
288
289	225	if (node.tag == tt.tagHead)
290		{
291	195	break;
292		}
293
294	30	if (node.tag == html.tag && node.type == Node.END_TAG)
295		{
296	0	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
297	0	continue;
298		}
299
300		// deal with comments etc.
301	30	if (Node.insertMisc(html, node))
302		{
303	4	continue;
304		}
305
306	26	lexer.ungetToken();
307	26	node = lexer.inferredTag("head");
308	26	break;
309		}
310
311	221	head = node;
312	221	html.insertNodeAtEnd(head);
313	221	HEAD.parse(lexer, head, mode);
314
315	221	while (true)
316		{
317	245	node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
318
319	245	if (node == null)
320		{
321	10	if (frameset == null)
322		{
323		// implied body
324	2	node = lexer.inferredTag("body");
325	2	html.insertNodeAtEnd(node);
326	2	BODY.parse(lexer, node, mode);
327		}
328
329	10	return;
330		}
331
332		// robustly handle html tags
333	235	if (node.tag == html.tag)
334		{
335	7	if (node.type != Node.START_TAG && frameset == null)
336		{
337	1	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
338		}
339	6	else if (node.type == Node.END_TAG)
340		{
341	6	lexer.seenEndHtml = true;
342		}
343
344	7	continue;
345		}
346
347		// deal with comments etc.
348	228	if (Node.insertMisc(html, node))
349		{
350	2	continue;
351		}
352
353		// if frameset document coerce <body> to <noframes>
354	226	if (node.tag == tt.tagBody)
355		{
356	194	if (node.type != Node.START_TAG)
357		{
358	1	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
359	1	continue;
360		}
361
362	193	if (frameset != null)
363		{
364	0	lexer.ungetToken();
365
366	0	if (noframes == null)
367		{
368	0	noframes = lexer.inferredTag("noframes");
369	0	frameset.insertNodeAtEnd(noframes);
370	0	lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG);
371		}
372
373	0	parseTag(lexer, noframes, mode);
374	0	continue;
375		}
376
377	193	lexer.constrainVersion(~Dict.VERS_FRAMESET);
378	193	break; // to parse body
379		}
380
381		// flag an error if we see more than one frameset
382	32	if (node.tag == tt.tagFrameset)
383		{
384	8	if (node.type != Node.START_TAG)
385		{
386	0	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
387	0	continue;
388		}
389
390	8	if (frameset != null)
391		{
392	0	lexer.report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
393		}
394		else
395		{
396	8	frameset = node;
397		}
398
399	8	html.insertNodeAtEnd(node);
400	8	parseTag(lexer, node, mode);
401
402		// see if it includes a noframes element so that we can merge subsequent noframes elements
403
404	8	for (node = frameset.content; node != null; node = node.next)
405		{
406	14	if (node.tag == tt.tagNoframes)
407		{
408	3	noframes = node;
409		}
410		}
411	8	continue;
412		}
413
414		// if not a frameset document coerce <noframes> to <body>
415	24	if (node.tag == tt.tagNoframes)
416		{
417	1	if (node.type != Node.START_TAG)
418		{
419	0	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
420	0	continue;
421		}
422
423	1	if (frameset == null)
424		{
425	1	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
426	1	node = lexer.inferredTag("body");
427	1	break;
428		}
429
430	0	if (noframes == null)
431		{
432	0	noframes = node;
433	0	frameset.insertNodeAtEnd(noframes);
434		}
435
436	0	parseTag(lexer, noframes, mode);
437	0	continue;
438		}
439
440	23	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
441		{
442	16	if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
443		{
444	0	moveToHead(lexer, html, node);
445	0	continue;
446		}
447
448		// #427675 - discard illegal frame element following a frameset - fix by Randy Waki 11 Oct 00
449	16	if (frameset != null && node.tag == tt.tagFrame)
450		{
451	3	lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
452	3	continue;
453		}
454		}
455
456	20	lexer.ungetToken();
457
458		// insert other content into noframes element
459	20	if (frameset != null)
460		{
461	3	if (noframes == null)
462		{
463	2	noframes = lexer.inferredTag("noframes");
464	2	frameset.insertNodeAtEnd(noframes);
465		}
466		else
467		{
468	1	lexer.report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
469		}
470
471	3	lexer.constrainVersion(Dict.VERS_FRAMESET);
472	3	parseTag(lexer, noframes, mode);
473	3	continue;
474		}
475
476	17	node = lexer.inferredTag("body");
477	17	lexer.constrainVersion(~Dict.VERS_FRAMESET);
478	17	break;
479		}
480
481		// node must be body
482	211	html.insertNodeAtEnd(node);
483	211	parseTag(lexer, node, mode);
484	211	lexer.seenEndHtml = true;
485		}
486
487		}
488
489		/**
490		* Parser for HEAD.
491		*/
492		public static class ParseHead implements Parser
493		{
494
495		/**
496		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
497		*/
498	221	public void parse(Lexer lexer, Node head, short mode)
499		{
500	221	Node node;
501	221	int hasTitle = 0;
502	221	int hasBase = 0;
503	221	TagTable tt = lexer.configuration.tt;
504
505	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
506		{
507	536	if (node.tag == head.tag && node.type == Node.END_TAG)
508		{
509	186	head.closed = true;
510	186	break;
511		}
512
513	350	if (node.type == Node.TEXT_NODE)
514		{
515	4	lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
516	4	lexer.ungetToken();
517	4	break;
518		}
519
520		// deal with comments etc.
521	346	if (Node.insertMisc(head, node))
522		{
523	12	continue;
524		}
525
526	334	if (node.type == Node.DOCTYPE_TAG)
527		{
528	1	Node.insertDocType(lexer, head, node);
529	1	continue;
530		}
531
532		// discard unknown tags
533	333	if (node.tag == null)
534		{
535	0	lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
536	0	continue;
537		}
538
539	333	if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD))
540		{
541		// #545067 Implicit closing of head broken - warn only for XHTML input
542	30	if (lexer.isvoyager)
543		{
544	2	lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
545		}
546	30	lexer.ungetToken();
547	30	break;
548		}
549
550	303	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
551		{
552	300	if (node.tag == tt.tagTitle)
553		{
554	213	++hasTitle;
555
556	213	if (hasTitle > 1)
557		{
558	0	lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
559		}
560		}
561	87	else if (node.tag == tt.tagBase)
562		{
563	0	++hasBase;
564
565	0	if (hasBase > 1)
566		{
567	0	lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
568		}
569		}
570	87	else if (node.tag == tt.tagNoscript)
571		{
572	0	lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
573		}
574
575	300	head.insertNodeAtEnd(node);
576	300	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
577	300	continue;
578		}
579
580		// discard unexpected text nodes and end tags
581	3	lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
582		}
583
584	221	if (hasTitle == 0)
585		{
586	8	if (!lexer.configuration.bodyOnly)
587		{
588	8	lexer.report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
589		}
590	8	head.insertNodeAtEnd(lexer.inferredTag("title"));
591		}
592		}
593
594		}
595
596		/**
597		* Parser for TITLE.
598		*/
599		public static class ParseTitle implements Parser
600		{
601
602		/**
603		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
604		*/
605	212	public void parse(Lexer lexer, Node title, short mode)
606		{
607	212	Node node;
608
609	?	while ((node = lexer.getToken(Lexer.MIXED_CONTENT)) != null)
610		{
611		// [438658] : Missing / in title endtag makes 2 titles
612	425	if (node.tag == title.tag && node.type == Node.START_TAG)
613		{
614	1	lexer.report.warning(lexer, title, node, Report.COERCE_TO_ENDTAG);
615	1	node.type = Node.END_TAG;
616	1	continue;
617		}
618	424	else if (node.tag == title.tag && node.type == Node.END_TAG)
619		{
620	209	title.closed = true;
621	209	Node.trimSpaces(lexer, title);
622	209	return;
623		}
624
625	215	if (node.type == Node.TEXT_NODE)
626		{
627		// only called for 1st child
628	211	if (title.content == null)
629		{
630	210	Node.trimInitialSpace(lexer, title, node);
631		}
632
633	211	if (node.start >= node.end)
634		{
635	0	continue;
636		}
637
638	211	title.insertNodeAtEnd(node);
639	211	continue;
640		}
641
642		// deal with comments etc.
643	4	if (Node.insertMisc(title, node))
644		{
645	1	continue;
646		}
647
648		// discard unknown tags
649	3	if (node.tag == null)
650		{
651	0	lexer.report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
652	0	continue;
653		}
654
655		// pushback unexpected tokens
656	3	lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
657	3	lexer.ungetToken();
658	3	Node.trimSpaces(lexer, title);
659	3	return;
660		}
661
662	0	lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
663		}
664
665		}
666
667		/**
668		* Parser for SCRIPT.
669		*/
670		public static class ParseScript implements Parser
671		{
672
673		/**
674		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
675		*/
676	34	public void parse(Lexer lexer, Node script, short mode)
677		{
678		// This isn't quite right for CDATA content as it recognises tags within the content and parses them
679		// accordingly. This will unfortunately screw up scripts which include < + letter, < + !, < + ? or < + / +
680		// letter
681
682	34	Node node = lexer.getCDATA(script);
683
684	34	if (node != null)
685		{
686	31	script.insertNodeAtEnd(node);
687		}
688		}
689
690		}
691
692		/**
693		* Parser for BODY.
694		*/
695		public static class ParseBody implements Parser
696		{
697
698		/**
699		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
700		*/
701	217	public void parse(Lexer lexer, Node body, short mode)
702		{
703	217	Node node;
704	217	boolean checkstack, iswhitenode;
705
706	217	mode = Lexer.IGNORE_WHITESPACE;
707	217	checkstack = true;
708	217	TagTable tt = lexer.configuration.tt;
709
710	217	Clean.bumpObject(lexer, body.parent);
711
712	?	while ((node = lexer.getToken(mode)) != null)
713		{
714
715		// #538536 Extra endtags not detected
716	1033	if (node.tag == tt.tagHtml)
717		{
718	192	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG \|\| lexer.seenEndHtml)
719		{
720	2	lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
721		}
722		else
723		{
724	190	lexer.seenEndHtml = true;
725		}
726
727	192	continue;
728		}
729
730	841	if (lexer.seenEndBody
731		&& (node.type == Node.START_TAG \|\| node.type == Node.END_TAG \|\| node.type == Node.START_END_TAG))
732		{
733	8	lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
734		}
735
736	841	if (node.tag == body.tag && node.type == Node.END_TAG)
737		{
738	193	body.closed = true;
739	193	Node.trimSpaces(lexer, body);
740	193	lexer.seenEndBody = true;
741	193	mode = Lexer.IGNORE_WHITESPACE;
742
743	193	if (body.parent.tag == tt.tagNoframes)
744		{
745	3	break;
746		}
747
748	190	continue;
749		}
750
751	648	if (node.tag == tt.tagNoframes)
752		{
753	0	if (node.type == Node.START_TAG)
754		{
755	0	body.insertNodeAtEnd(node);
756	0	BLOCK.parse(lexer, node, mode);
757	0	continue;
758		}
759
760	0	if (node.type == Node.END_TAG && body.parent.tag == tt.tagNoframes)
761		{
762	0	Node.trimSpaces(lexer, body);
763	0	lexer.ungetToken();
764	0	break;
765		}
766		}
767
768	648	if ((node.tag == tt.tagFrame \|\| node.tag == tt.tagFrameset) && body.parent.tag == tt.tagNoframes)
769		{
770	0	Node.trimSpaces(lexer, body);
771	0	lexer.ungetToken();
772	0	break;
773		}
774
775	648	iswhitenode = false;
776
777	648	if (node.type == Node.TEXT_NODE
778		&& node.end <= node.start + 1
779		&& node.textarray[node.start] == (byte) ' ')
780		{
781	57	iswhitenode = true;
782		}
783
784		// deal with comments etc.
785	648	if (Node.insertMisc(body, node))
786		{
787	13	continue;
788		}
789
790		// #538536 Extra endtags not detected
791		// if (lexer.seenEndBody && !iswhitenode)
792		// {
793		// lexer.seenEndBody = true;
794		// lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
795		// }
796
797		// mixed content model permits text
798	635	if (node.type == Node.TEXT_NODE)
799		{
800	112	if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
801		{
802	0	continue;
803		}
804
805	112	if (lexer.configuration.encloseBodyText && !iswhitenode)
806		{
807	0	Node para;
808
809	0	lexer.ungetToken();
810	0	para = lexer.inferredTag("p");
811	0	body.insertNodeAtEnd(para);
812	0	parseTag(lexer, para, mode);
813	0	mode = Lexer.MIXED_CONTENT;
814	0	continue;
815		}
816
817		// HTML2 and HTML4 strict doesn't allow text here
818	112	lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT \| Dict.VERS_HTML20));
819
820	112	if (checkstack)
821		{
822	48	checkstack = false;
823
824	48	if (lexer.inlineDup(node) > 0)
825		{
826	2	continue;
827		}
828		}
829
830	110	body.insertNodeAtEnd(node);
831	110	mode = Lexer.MIXED_CONTENT;
832	110	continue;
833		}
834
835	523	if (node.type == Node.DOCTYPE_TAG)
836		{
837	1	Node.insertDocType(lexer, body, node);
838	1	continue;
839		}
840		// discard unknown and PARAM tags
841	522	if (node.tag == null \|\| node.tag == tt.tagParam)
842		{
843	24	lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
844	24	continue;
845		}
846
847		// Netscape allows LI and DD directly in BODY We infer UL or DL respectively and use this boolean to
848		// exclude block-level elements so as to match Netscape's observed behaviour.
849
850	498	lexer.excludeBlocks = false;
851
852	498	if ((!((node.tag.model & Dict.CM_BLOCK) != 0) && !((node.tag.model & Dict.CM_INLINE) != 0))
853		\|\| node.tag == tt.tagInput)
854		{
855		// avoid this error message being issued twice
856	18	if (!((node.tag.model & Dict.CM_HEAD) != 0))
857		{
858	17	lexer.report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
859		}
860
861	18	if ((node.tag.model & Dict.CM_HTML) != 0)
862		{
863		// copy body attributes if current body was inferred
864	10	if (node.tag == tt.tagBody && body.implicit && body.attributes == null)
865		{
866	6	body.attributes = node.attributes;
867	6	node.attributes = null;
868		}
869
870	10	continue;
871		}
872
873	8	if ((node.tag.model & Dict.CM_HEAD) != 0)
874		{
875	1	moveToHead(lexer, body, node);
876	1	continue;
877		}
878
879	7	if ((node.tag.model & Dict.CM_LIST) != 0)
880		{
881	2	lexer.ungetToken();
882	2	node = lexer.inferredTag("ul");
883	2	node.addClass("noindent");
884	2	lexer.excludeBlocks = true;
885		}
886	5	else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
887		{
888	1	lexer.ungetToken();
889	1	node = lexer.inferredTag("dl");
890	1	lexer.excludeBlocks = true;
891		}
892	4	else if ((node.tag.model & (Dict.CM_TABLE \| Dict.CM_ROWGRP \| Dict.CM_ROW)) != 0)
893		{
894	0	lexer.ungetToken();
895	0	node = lexer.inferredTag("table");
896	0	lexer.excludeBlocks = true;
897		}
898	4	else if (node.tag == tt.tagInput)
899		{
900	1	lexer.ungetToken();
901	1	node = lexer.inferredTag("form");
902	1	lexer.excludeBlocks = true;
903		}
904		else
905		{
906	3	if (!((node.tag.model & (Dict.CM_ROW \| Dict.CM_FIELD)) != 0))
907		{
908	2	lexer.ungetToken();
909	2	return;
910		}
911
912		// ignore </td></th> <option> etc.
913	1	continue;
914		}
915		}
916
917	484	if (node.type == Node.END_TAG)
918		{
919	19	if (node.tag == tt.tagBr)
920		{
921	0	node.type = Node.START_TAG;
922		}
923	19	else if (node.tag == tt.tagP)
924		{
925	0	Node.coerceNode(lexer, node, tt.tagBr);
926	0	body.insertNodeAtEnd(node);
927	0	node = lexer.inferredTag("br");
928		}
929	19	else if ((node.tag.model & Dict.CM_INLINE) != 0)
930		{
931	16	lexer.popInline(node);
932		}
933		}
934
935	484	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
936		{
937	465	if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
938		{
939		// HTML4 strict doesn't allow inline content here
940		// but HTML2 does allow img elements as children of body
941	92	if (node.tag == tt.tagImg)
942		{
943	4	lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
944		}
945		else
946		{
947	88	lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT \| Dict.VERS_HTML20));
948		}
949
950	92	if (checkstack && !node.implicit)
951		{
952	46	checkstack = false;
953
954	46	if (lexer.inlineDup(node) > 0)
955		{
956	2	continue;
957		}
958		}
959
960	90	mode = Lexer.MIXED_CONTENT;
961		}
962		else
963		{
964	373	checkstack = true;
965	373	mode = Lexer.IGNORE_WHITESPACE;
966		}
967
968	463	if (node.implicit)
969		{
970	8	lexer.report.warning(lexer, body, node, Report.INSERTING_TAG);
971		}
972
973	463	body.insertNodeAtEnd(node);
974	463	parseTag(lexer, node, mode);
975	463	continue;
976		}
977
978		// discard unexpected tags
979	19	lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
980		}
981		}
982
983		}
984
985		/**
986		* Parser for FRAMESET.
987		*/
988		public static class ParseFrameSet implements Parser
989		{
990
991		/**
992		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
993		*/
994	9	public void parse(Lexer lexer, Node frameset, short mode)
995		{
996	9	Node node;
997	9	TagTable tt = lexer.configuration.tt;
998
999	9	lexer.badAccess \|= Report.USING_FRAMES;
1000
1001	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1002		{
1003	25	if (node.tag == frameset.tag && node.type == Node.END_TAG)
1004		{
1005	8	frameset.closed = true;
1006	8	Node.trimSpaces(lexer, frameset);
1007	8	return;
1008		}
1009
1010		// deal with comments etc.
1011	17	if (Node.insertMisc(frameset, node))
1012		{
1013	0	continue;
1014		}
1015
1016	17	if (node.tag == null)
1017		{
1018	0	lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
1019	0	continue;
1020		}
1021
1022	17	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
1023		{
1024	14	if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
1025		{
1026	0	moveToHead(lexer, frameset, node);
1027	0	continue;
1028		}
1029		}
1030
1031	17	if (node.tag == tt.tagBody)
1032		{
1033	0	lexer.ungetToken();
1034	0	node = lexer.inferredTag("noframes");
1035	0	lexer.report.warning(lexer, frameset, node, Report.INSERTING_TAG);
1036		}
1037
1038	17	if (node.type == Node.START_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
1039		{
1040	14	frameset.insertNodeAtEnd(node);
1041	14	lexer.excludeBlocks = false;
1042	14	parseTag(lexer, node, Lexer.MIXED_CONTENT);
1043	14	continue;
1044		}
1045	3	else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
1046		{
1047	0	frameset.insertNodeAtEnd(node);
1048	0	continue;
1049		}
1050
1051		// discard unexpected tags
1052	3	lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
1053		}
1054
1055	1	lexer.report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
1056		}
1057
1058		}
1059
1060		/**
1061		* Parser for INLINE.
1062		*/
1063		public static class ParseInline implements Parser
1064		{
1065
1066		/**
1067		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1068		*/
1069	2942	public void parse(Lexer lexer, Node element, short mode)
1070		{
1071	2942	Node node, parent;
1072	2942	TagTable tt = lexer.configuration.tt;
1073
1074	2942	if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY))
1075		{
1076	0	return;
1077		}
1078
1079		// ParseInline is used for some block level elements like H1 to H6 For such elements we need to insert
1080		// inline emphasis tags currently on the inline stack. For Inline elements, we normally push them onto the
1081		// inline stack provided they aren't implicit or OBJECT/APPLET. This test is carried out in PushInline and
1082		// PopInline, see istack.c We don't push SPAN to replicate current browser behavior
1083
1084	2942	if (TidyUtils.toBoolean(element.tag.model & Dict.CM_BLOCK) \|\| (element.tag == tt.tagDt))
1085		{
1086	403	lexer.inlineDup(null);
1087		}
1088	2539	else if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
1089		{
1090		// && element.tag != tt.tagSpan #540571 Inconsistent behaviour with span inline element
1091	2539	lexer.pushInline(element);
1092		}
1093
1094	2942	if (element.tag == tt.tagNobr)
1095		{
1096	1	lexer.badLayout \|= Report.USING_NOBR;
1097		}
1098	2941	else if (element.tag == tt.tagFont)
1099		{
1100	682	lexer.badLayout \|= Report.USING_FONT;
1101		}
1102
1103		// Inline elements may or may not be within a preformatted element
1104	2942	if (mode != Lexer.PREFORMATTED)
1105		{
1106	2937	mode = Lexer.MIXED_CONTENT;
1107		}
1108
1109	?	while ((node = lexer.getToken(mode)) != null)
1110		{
1111		// end tag for current element
1112	11915	if (node.tag == element.tag && node.type == Node.END_TAG)
1113		{
1114	2793	if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
1115		{
1116	2423	lexer.popInline(node);
1117		}
1118
1119	2793	if (!TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
1120		{
1121	2788	Node.trimSpaces(lexer, element);
1122		}
1123
1124		// if a font element wraps an anchor and nothing else then move the font element inside the anchor
1125		// since otherwise it won't alter the anchor text color
1126
1127	2793	if (element.tag == tt.tagFont && element.content != null && element.content == element.last)
1128		{
1129	324	Node child = element.content;
1130
1131	324	if (child.tag == tt.tagA)
1132		{
1133	55	child.parent = element.parent;
1134	55	child.next = element.next;
1135	55	child.prev = element.prev;
1136
1137	55	if (child.prev != null)
1138		{
1139	46	child.prev.next = child;
1140		}
1141		else
1142		{
1143	9	child.parent.content = child;
1144		}
1145
1146	55	if (child.next != null)
1147		{
1148	0	child.next.prev = child;
1149		}
1150		else
1151		{
1152	55	child.parent.last = child;
1153		}
1154
1155	55	element.next = null;
1156	55	element.prev = null;
1157	55	element.parent = child;
1158	55	element.content = child.content;
1159	55	element.last = child.last;
1160	55	child.content = element;
1161	55	child.last = element;
1162	55	for (child = element.content; child != null; child = child.next)
1163		{
1164	49	child.parent = element;
1165		}
1166		}
1167		}
1168	2793	element.closed = true;
1169	2793	Node.trimSpaces(lexer, element);
1170	2793	Node.trimEmptyElement(lexer, element);
1171	2793	return;
1172		}
1173
1174		// <u> ... <u> map 2nd <u> to </u> if 1st is explicit
1175		// otherwise emphasis nesting is probably unintentional
1176		// big and small have cumulative effect to leave them alone
1177	9122	if (node.type == Node.START_TAG
1178		&& node.tag == element.tag
1179		&& lexer.isPushed(node)
1180		&& !node.implicit
1181		&& !element.implicit
1182		&& node.tag != null
1183		&& ((node.tag.model & Dict.CM_INLINE) != 0)
1184		&& node.tag != tt.tagA
1185		&& node.tag != tt.tagFont
1186		&& node.tag != tt.tagBig
1187		&& node.tag != tt.tagSmall
1188		&& node.tag != tt.tagQ)
1189		{
1190	25	if (element.content != null && node.attributes == null)
1191		{
1192	3	lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1193	3	node.type = Node.END_TAG;
1194	3	lexer.ungetToken();
1195	3	continue;
1196		}
1197
1198	22	lexer.report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
1199		}
1200	9097	else if (lexer.isPushed(node) && node.type == Node.START_TAG && node.tag == tt.tagQ)
1201		{
1202	1	lexer.report.warning(lexer, element, node, Report.NESTED_QUOTATION);
1203		}
1204
1205	9119	if (node.type == Node.TEXT_NODE)
1206		{
1207		// only called for 1st child
1208	5041	if (element.content == null && !TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
1209		{
1210	2007	Node.trimSpaces(lexer, element);
1211		}
1212
1213	5041	if (node.start >= node.end)
1214		{
1215	1	continue;
1216		}
1217
1218	5040	element.insertNodeAtEnd(node);
1219	5040	continue;
1220		}
1221
1222		// mixed content model so allow text
1223	4078	if (Node.insertMisc(element, node))
1224		{
1225	330	continue;
1226		}
1227
1228		// deal with HTML tags
1229	3748	if (node.tag == tt.tagHtml)
1230		{
1231	1	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
1232		{
1233	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1234	0	continue;
1235		}
1236
1237		// otherwise infer end of inline element
1238	1	lexer.ungetToken();
1239	1	if (!((mode & Lexer.PREFORMATTED) != 0))
1240		{
1241	1	Node.trimSpaces(lexer, element);
1242		}
1243	1	Node.trimEmptyElement(lexer, element);
1244	1	return;
1245		}
1246
1247		// within <dt> or <pre> map <p> to <br>
1248	3747	if (node.tag == tt.tagP
1249		&& node.type == Node.START_TAG
1250		&& ((mode & Lexer.PREFORMATTED) != 0 \|\| element.tag == tt.tagDt \|\| element.isDescendantOf(tt.tagDt)))
1251		{
1252	0	node.tag = tt.tagBr;
1253	0	node.element = "br";
1254	0	Node.trimSpaces(lexer, element);
1255	0	element.insertNodeAtEnd(node);
1256	0	continue;
1257		}
1258
1259		// ignore unknown and PARAM tags
1260	3747	if (node.tag == null \|\| node.tag == tt.tagParam)
1261		{
1262	25	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1263	25	continue;
1264		}
1265
1266	3722	if (node.tag == tt.tagBr && node.type == Node.END_TAG)
1267		{
1268	0	node.type = Node.START_TAG;
1269		}
1270
1271	3722	if (node.type == Node.END_TAG)
1272		{
1273		// coerce </br> to <br>
1274	82	if (node.tag == tt.tagBr)
1275		{
1276	0	node.type = Node.START_TAG;
1277		}
1278	82	else if (node.tag == tt.tagP)
1279		{
1280		// coerce unmatched </p> to <br><br>
1281	32	if (!element.isDescendantOf(tt.tagP))
1282		{
1283	0	Node.coerceNode(lexer, node, tt.tagBr);
1284	0	Node.trimSpaces(lexer, element);
1285	0	element.insertNodeAtEnd(node);
1286	0	node = lexer.inferredTag("br");
1287	0	continue;
1288		}
1289		}
1290	50	else if ((node.tag.model & Dict.CM_INLINE) != 0
1291		&& node.tag != tt.tagA
1292		&& !((node.tag.model & Dict.CM_OBJECT) != 0)
1293		&& (element.tag.model & Dict.CM_INLINE) != 0)
1294		{
1295		// allow any inline end tag to end current element
1296	4	lexer.popInline(element);
1297
1298	4	if (element.tag != tt.tagA)
1299		{
1300	4	if (node.tag == tt.tagA && node.tag != element.tag)
1301		{
1302	0	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1303	0	lexer.ungetToken();
1304		}
1305		else
1306		{
1307	4	lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1308		}
1309
1310	4	if (!((mode & Lexer.PREFORMATTED) != 0))
1311		{
1312	4	Node.trimSpaces(lexer, element);
1313		}
1314	4	Node.trimEmptyElement(lexer, element);
1315	4	return;
1316		}
1317
1318		// if parent is <a> then discard unexpected inline end tag
1319	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1320	0	continue;
1321		} // special case </tr> etc. for stuff moved in front of table
1322	46	else if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
1323		{
1324	0	lexer.ungetToken();
1325	0	Node.trimSpaces(lexer, element);
1326	0	Node.trimEmptyElement(lexer, element);
1327	0	return;
1328		}
1329		}
1330
1331		// allow any header tag to end current header
1332	3718	if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
1333		{
1334	2	if (node.tag == element.tag)
1335		{
1336	0	lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
1337		}
1338		else
1339		{
1340	2	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1341	2	lexer.ungetToken();
1342		}
1343	2	if (!((mode & Lexer.PREFORMATTED) != 0))
1344		{
1345	2	Node.trimSpaces(lexer, element);
1346		}
1347	2	Node.trimEmptyElement(lexer, element);
1348	2	return;
1349		}
1350
1351		// an <A> tag to ends any open <A> element but <A href=...> is mapped to </A><A href=...>
1352
1353		// #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
1354		// if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
1355	3716	if (node.tag == tt.tagA
1356		&& !node.implicit
1357		&& (element.tag == tt.tagA \|\| element.isDescendantOf(tt.tagA)))
1358		{
1359		// coerce <a> to </a> unless it has some attributes
1360		// #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
1361		// other fixes by Dave Raggett
1362		// if (node.attributes == null)
1363	5	if (node.type != Node.END_TAG && node.attributes == null)
1364		{
1365	1	node.type = Node.END_TAG;
1366	1	lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
1367		// lexer.popInline(node);
1368	1	lexer.ungetToken();
1369	1	continue;
1370		}
1371
1372	4	lexer.ungetToken();
1373	4	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1374		// lexer.popInline(element);
1375	4	if (!((mode & Lexer.PREFORMATTED) != 0))
1376		{
1377	4	Node.trimSpaces(lexer, element);
1378		}
1379	4	Node.trimEmptyElement(lexer, element);
1380	4	return;
1381		}
1382
1383	3711	if ((element.tag.model & Dict.CM_HEADING) != 0)
1384		{
1385	165	if (node.tag == tt.tagCenter \|\| node.tag == tt.tagDiv)
1386		{
1387	3	if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1388		{
1389	1	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1390	1	continue;
1391		}
1392
1393	2	lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1394
1395		// insert center as parent if heading is empty
1396	2	if (element.content == null)
1397		{
1398	0	Node.insertNodeAsParent(element, node);
1399	0	continue;
1400		}
1401
1402		// split heading and make center parent of 2nd part
1403	2	element.insertNodeAfterElement(node);
1404
1405	2	if (!((mode & Lexer.PREFORMATTED) != 0))
1406		{
1407	2	Node.trimSpaces(lexer, element);
1408		}
1409
1410	2	element = lexer.cloneNode(element);
1411	2	element.start = lexer.lexsize;
1412	2	element.end = lexer.lexsize;
1413	2	node.insertNodeAtEnd(element);
1414	2	continue;
1415		}
1416
1417	162	if (node.tag == tt.tagHr)
1418		{
1419	0	if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1420		{
1421	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1422	0	continue;
1423		}
1424
1425	0	lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1426
1427		// insert hr before heading if heading is empty
1428	0	if (element.content == null)
1429		{
1430	0	Node.insertNodeBeforeElement(element, node);
1431	0	continue;
1432		}
1433
1434		// split heading and insert hr before 2nd part
1435	0	element.insertNodeAfterElement(node);
1436
1437	0	if (!((mode & Lexer.PREFORMATTED) != 0))
1438		{
1439	0	Node.trimSpaces(lexer, element);
1440		}
1441
1442	0	element = lexer.cloneNode(element);
1443	0	element.start = lexer.lexsize;
1444	0	element.end = lexer.lexsize;
1445	0	node.insertNodeAfterElement(element);
1446	0	continue;
1447		}
1448		}
1449
1450	3708	if (element.tag == tt.tagDt)
1451		{
1452	132	if (node.tag == tt.tagHr)
1453		{
1454	2	Node dd;
1455
1456	2	if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
1457		{
1458	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1459	0	continue;
1460		}
1461
1462	2	lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
1463	2	dd = lexer.inferredTag("dd");
1464
1465		// insert hr within dd before dt if dt is empty
1466	2	if (element.content == null)
1467		{
1468	0	Node.insertNodeBeforeElement(element, dd);
1469	0	dd.insertNodeAtEnd(node);
1470	0	continue;
1471		}
1472
1473		// split dt and insert hr within dd before 2nd part
1474	2	element.insertNodeAfterElement(dd);
1475	2	dd.insertNodeAtEnd(node);
1476
1477	2	if (!((mode & Lexer.PREFORMATTED) != 0))
1478		{
1479	2	Node.trimSpaces(lexer, element);
1480		}
1481
1482	2	element = lexer.cloneNode(element);
1483	2	element.start = lexer.lexsize;
1484	2	element.end = lexer.lexsize;
1485	2	dd.insertNodeAfterElement(element);
1486	2	continue;
1487		}
1488		}
1489
1490		// if this is the end tag for an ancestor element then infer end tag for this element
1491
1492	3706	if (node.type == Node.END_TAG)
1493		{
1494	77	for (parent = element.parent; parent != null; parent = parent.parent)
1495		{
1496	99	if (node.tag == parent.tag)
1497		{
1498	70	if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
1499		{
1500	4	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1501		}
1502
1503	70	if (element.tag == tt.tagA)
1504		{
1505	5	lexer.popInline(element);
1506		}
1507
1508	70	lexer.ungetToken();
1509
1510	70	if (!((mode & Lexer.PREFORMATTED) != 0))
1511		{
1512	70	Node.trimSpaces(lexer, element);
1513		}
1514
1515	70	Node.trimEmptyElement(lexer, element);
1516	70	return;
1517		}
1518		}
1519		}
1520
1521		// block level tags end this element
1522	3636	if (!((node.tag.model & Dict.CM_INLINE) != 0))
1523		{
1524	64	if (node.type != Node.START_TAG)
1525		{
1526	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1527	0	continue;
1528		}
1529
1530	64	if (!((element.tag.model & Dict.CM_OPT) != 0))
1531		{
1532	43	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
1533		}
1534
1535	64	if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
1536		{
1537	0	moveToHead(lexer, element, node);
1538	0	continue;
1539		}
1540
1541		// prevent anchors from propagating into block tags except for headings h1 to h6
1542
1543	64	if (element.tag == tt.tagA)
1544		{
1545	3	if (node.tag != null && !((node.tag.model & Dict.CM_HEADING) != 0))
1546		{
1547	1	lexer.popInline(element);
1548		}
1549	2	else if (!(element.content != null))
1550		{
1551	2	Node.discardElement(element);
1552	2	lexer.ungetToken();
1553	2	return;
1554		}
1555		}
1556
1557	62	lexer.ungetToken();
1558
1559	62	if (!((mode & Lexer.PREFORMATTED) != 0))
1560		{
1561	62	Node.trimSpaces(lexer, element);
1562		}
1563
1564	62	Node.trimEmptyElement(lexer, element);
1565	62	return;
1566		}
1567
1568		// parse inline element
1569	3572	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
1570		{
1571	3565	if (node.implicit)
1572		{
1573	55	lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
1574		}
1575
1576		// trim white space before <br>
1577	3565	if (node.tag == tt.tagBr)
1578		{
1579	1361	Node.trimSpaces(lexer, element);
1580		}
1581
1582	3565	element.insertNodeAtEnd(node);
1583	3565	parseTag(lexer, node, mode);
1584	3565	continue;
1585		}
1586
1587		// discard unexpected tags
1588	7	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
1589	7	continue;
1590		}
1591
1592	4	if (!((element.tag.model & Dict.CM_OPT) != 0))
1593		{
1594	1	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
1595		}
1596
1597	4	Node.trimEmptyElement(lexer, element);
1598		}
1599		}
1600
1601		/**
1602		* Parser for LIST.
1603		*/
1604		public static class ParseList implements Parser
1605		{
1606
1607	71	public void parse(Lexer lexer, Node list, short mode)
1608		{
1609	71	Node node;
1610	71	Node parent;
1611	71	TagTable tt = lexer.configuration.tt;
1612
1613	71	if ((list.tag.model & Dict.CM_EMPTY) != 0)
1614		{
1615	0	return;
1616		}
1617
1618	71	lexer.insert = -1; // defer implicit inline start tags
1619
1620	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1621		{
1622	245	if (node.tag == list.tag && node.type == Node.END_TAG)
1623		{
1624	68	if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1625		{
1626	0	Node.coerceNode(lexer, list, tt.tagUl);
1627		}
1628
1629	68	list.closed = true;
1630	68	Node.trimEmptyElement(lexer, list);
1631	68	return;
1632		}
1633
1634		// deal with comments etc.
1635	177	if (Node.insertMisc(list, node))
1636		{
1637	0	continue;
1638		}
1639
1640	177	if (node.type != Node.TEXT_NODE && node.tag == null)
1641		{
1642	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1643	0	continue;
1644		}
1645
1646		// if this is the end tag for an ancestor element then infer end tag for this element
1647
1648	177	if (node.type == Node.END_TAG)
1649		{
1650	0	if (node.tag == tt.tagForm)
1651		{
1652	0	badForm(lexer);
1653	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1654	0	continue;
1655		}
1656
1657	0	if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
1658		{
1659	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1660	0	lexer.popInline(node);
1661	0	continue;
1662		}
1663
1664	0	for (parent = list.parent; parent != null; parent = parent.parent)
1665		{
1666	0	if (node.tag == parent.tag)
1667		{
1668	0	lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1669	0	lexer.ungetToken();
1670
1671	0	if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1672		{
1673	0	Node.coerceNode(lexer, list, tt.tagUl);
1674		}
1675
1676	0	Node.trimEmptyElement(lexer, list);
1677	0	return;
1678		}
1679		}
1680
1681	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1682	0	continue;
1683		}
1684
1685	177	if (node.tag != tt.tagLi)
1686		{
1687	6	lexer.ungetToken();
1688
1689	6	if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0 && lexer.excludeBlocks)
1690		{
1691	2	lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1692	2	Node.trimEmptyElement(lexer, list);
1693	2	return;
1694		}
1695
1696	4	node = lexer.inferredTag("li");
1697	4	node.addAttribute("style", "list-style: none");
1698	4	lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1699		}
1700
1701		// node should be <LI>
1702	175	list.insertNodeAtEnd(node);
1703	175	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
1704		}
1705
1706	1	if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
1707		{
1708	0	Node.coerceNode(lexer, list, tt.tagUl);
1709		}
1710
1711	1	lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1712	1	Node.trimEmptyElement(lexer, list);
1713		}
1714
1715		}
1716
1717		/**
1718		* Parser for empty elements.
1719		*/
1720		public static class ParseEmpty implements Parser
1721		{
1722
1723		/**
1724		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1725		*/
1726	1823	public void parse(Lexer lexer, Node element, short mode)
1727		{
1728	1823	if (lexer.isvoyager)
1729		{
1730	2	Node node = lexer.getToken(mode);
1731	2	if (node != null && !(node.type == Node.END_TAG && node.tag == element.tag))
1732		{
1733	0	lexer.report.warning(lexer, element, node, Report.ELEMENT_NOT_EMPTY);
1734	0	lexer.ungetToken();
1735		}
1736		}
1737		}
1738		}
1739
1740		/**
1741		* Parser for DEFLIST.
1742		*/
1743		public static class ParseDefList implements Parser
1744		{
1745
1746		/**
1747		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1748		*/
1749	28	public void parse(Lexer lexer, Node list, short mode)
1750		{
1751	28	Node node, parent;
1752	28	TagTable tt = lexer.configuration.tt;
1753
1754	28	if ((list.tag.model & Dict.CM_EMPTY) != 0)
1755		{
1756	0	return;
1757		}
1758
1759	28	lexer.insert = -1; // defer implicit inline start tags
1760
1761	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
1762		{
1763	143	if (node.tag == list.tag && node.type == Node.END_TAG)
1764		{
1765	26	list.closed = true;
1766	26	Node.trimEmptyElement(lexer, list);
1767	26	return;
1768		}
1769
1770		// deal with comments etc.
1771	117	if (Node.insertMisc(list, node))
1772		{
1773	0	continue;
1774		}
1775
1776	117	if (node.type == Node.TEXT_NODE)
1777		{
1778	0	lexer.ungetToken();
1779	0	node = lexer.inferredTag("dt");
1780	0	lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1781		}
1782
1783	117	if (node.tag == null)
1784		{
1785	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1786	0	continue;
1787		}
1788
1789		// if this is the end tag for an ancestor element then infer end tag for this element
1790
1791	117	if (node.type == Node.END_TAG)
1792		{
1793	2	if (node.tag == tt.tagForm)
1794		{
1795	0	badForm(lexer);
1796	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1797	0	continue;
1798		}
1799
1800	2	for (parent = list.parent; parent != null; parent = parent.parent)
1801		{
1802	5	if (node.tag == parent.tag)
1803		{
1804	1	lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
1805
1806	1	lexer.ungetToken();
1807	1	Node.trimEmptyElement(lexer, list);
1808	1	return;
1809		}
1810		}
1811		}
1812
1813		// center in a dt or a dl breaks the dl list in two
1814	116	if (node.tag == tt.tagCenter)
1815		{
1816	9	if (list.content != null)
1817		{
1818	1	list.insertNodeAfterElement(node);
1819		}
1820		else
1821		{
1822		// trim empty dl list
1823	8	Node.insertNodeBeforeElement(list, node);
1824
1825		// #540296 tidy dumps with empty definition list
1826	8	Node.discardElement(list);
1827		}
1828
1829		// and parse contents of center
1830	9	parseTag(lexer, node, mode);
1831
1832		// now create a new dl element
1833	9	list = lexer.inferredTag("dl");
1834	9	node.insertNodeAfterElement(list);
1835	9	continue;
1836		}
1837
1838	107	if (!(node.tag == tt.tagDt \|\| node.tag == tt.tagDd))
1839		{
1840	50	lexer.ungetToken();
1841
1842	50	if (!((node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0))
1843		{
1844	0	lexer.report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
1845	0	Node.trimEmptyElement(lexer, list);
1846	0	return;
1847		}
1848
1849		// if DD appeared directly in BODY then exclude blocks
1850	50	if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
1851		{
1852	1	Node.trimEmptyElement(lexer, list);
1853	1	return;
1854		}
1855
1856	49	node = lexer.inferredTag("dd");
1857	49	lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
1858		}
1859
1860	106	if (node.type == Node.END_TAG)
1861		{
1862	0	lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
1863	0	continue;
1864		}
1865
1866		// node should be <DT> or <DD>
1867	106	list.insertNodeAtEnd(node);
1868	106	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
1869		}
1870
1871	0	lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
1872	0	Node.trimEmptyElement(lexer, list);
1873		}
1874
1875		}
1876
1877		/**
1878		* Parser for PRE.
1879		*/
1880		public static class ParsePre implements Parser
1881		{
1882
1883		/**
1884		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
1885		*/
1886	5	public void parse(Lexer lexer, Node pre, short mode)
1887		{
1888	5	Node node;
1889	5	TagTable tt = lexer.configuration.tt;
1890
1891	5	if ((pre.tag.model & Dict.CM_EMPTY) != 0)
1892		{
1893	0	return;
1894		}
1895
1896	5	if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
1897		{
1898	0	Node.coerceNode(lexer, pre, tt.tagPre);
1899		}
1900
1901	5	lexer.inlineDup(null); // tell lexer to insert inlines if needed
1902
1903	?	while ((node = lexer.getToken(Lexer.PREFORMATTED)) != null)
1904		{
1905	24	if (node.tag == pre.tag && node.type == Node.END_TAG)
1906		{
1907	3	Node.trimSpaces(lexer, pre);
1908	3	pre.closed = true;
1909	3	Node.trimEmptyElement(lexer, pre);
1910	3	return;
1911		}
1912
1913	21	if (node.tag == tt.tagHtml)
1914		{
1915	0	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
1916		{
1917	0	lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1918		}
1919
1920	0	continue;
1921		}
1922
1923	21	if (node.type == Node.TEXT_NODE)
1924		{
1925		// if first check for inital newline
1926	10	if (pre.content == null)
1927		{
1928	3	if (node.textarray[node.start] == (byte) '\n')
1929		{
1930	0	++node.start;
1931		}
1932
1933	3	if (node.start >= node.end)
1934		{
1935	0	continue;
1936		}
1937		}
1938
1939	10	pre.insertNodeAtEnd(node);
1940	10	continue;
1941		}
1942
1943		// deal with comments etc.
1944	11	if (Node.insertMisc(pre, node))
1945		{
1946	1	continue;
1947		}
1948
1949		// strip unexpected tags
1950	10	if (!lexer.preContent(node))
1951		{
1952	4	Node newnode;
1953
1954	4	lexer.report.warning(lexer, pre, node, Report.UNESCAPED_ELEMENT);
1955	4	newnode = Node.escapeTag(lexer, node);
1956	4	pre.insertNodeAtEnd(newnode);
1957	4	continue;
1958		}
1959
1960	6	if (node.tag == tt.tagP)
1961		{
1962	0	if (node.type == Node.START_TAG)
1963		{
1964	0	lexer.report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);
1965
1966		// trim white space before <p> in <pre>
1967	0	Node.trimSpaces(lexer, pre);
1968
1969		// coerce both <p> and </p> to <br>
1970	0	Node.coerceNode(lexer, node, tt.tagBr);
1971	0	pre.insertNodeAtEnd(node);
1972		}
1973		else
1974		{
1975	0	lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1976		}
1977	0	continue;
1978		}
1979
1980	6	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
1981		{
1982		// trim white space before <br>
1983	6	if (node.tag == tt.tagBr)
1984		{
1985	1	Node.trimSpaces(lexer, pre);
1986		}
1987
1988	6	pre.insertNodeAtEnd(node);
1989	6	parseTag(lexer, node, Lexer.PREFORMATTED);
1990	6	continue;
1991		}
1992
1993		// discard unexpected tags
1994	0	lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
1995		}
1996
1997	2	lexer.report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
1998	2	Node.trimEmptyElement(lexer, pre);
1999		}
2000
2001		}
2002
2003		/**
2004		* Parser for block elements.
2005		*/
2006		public static class ParseBlock implements Parser
2007		{
2008
2009		/**
2010		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2011		*/
2012	1473	public void parse(Lexer lexer, Node element, short mode)
2013		{
2014		// element is node created by the lexer upon seeing the start tag, or by the parser when the start tag is
2015		// inferred.
2016	1473	Node node, parent;
2017	1473	boolean checkstack;
2018	1473	int istackbase = 0;
2019	1473	TagTable tt = lexer.configuration.tt;
2020
2021	1473	checkstack = true;
2022
2023	1473	if ((element.tag.model & Dict.CM_EMPTY) != 0)
2024		{
2025	0	return;
2026		}
2027
2028	1473	if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
2029		{
2030	1	lexer.report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
2031		}
2032
2033		// InlineDup() asks the lexer to insert inline emphasis tags currently pushed on the istack, but take care
2034		// to avoid propagating inline emphasis inside OBJECT or APPLET. For these elements a fresh inline stack
2035		// context is created and disposed of upon reaching the end of the element. They thus behave like table
2036		// cells in this respect.
2037
2038	1473	if ((element.tag.model & Dict.CM_OBJECT) != 0)
2039		{
2040	2	istackbase = lexer.istackbase;
2041	2	lexer.istackbase = lexer.istack.size();
2042		}
2043
2044	1473	if (!((element.tag.model & Dict.CM_MIXED) != 0))
2045		{
2046	1469	lexer.inlineDup(null);
2047		}
2048
2049	1473	mode = Lexer.IGNORE_WHITESPACE;
2050
2051	?	while ((node = lexer.getToken(mode)) != null)
2052		{
2053		// end tag for this element
2054	4085	if (node.type == Node.END_TAG
2055		&& node.tag != null
2056		&& (node.tag == element.tag \|\| element.was == node.tag))
2057		{
2058
2059	1374	if ((element.tag.model & Dict.CM_OBJECT) != 0)
2060		{
2061		// pop inline stack
2062	2	while (lexer.istack.size() > lexer.istackbase)
2063		{
2064	0	lexer.popInline(null);
2065		}
2066	2	lexer.istackbase = istackbase;
2067		}
2068
2069	1374	element.closed = true;
2070	1374	Node.trimSpaces(lexer, element);
2071	1374	Node.trimEmptyElement(lexer, element);
2072	1374	return;
2073		}
2074
2075	2711	if (node.tag == tt.tagHtml \|\| node.tag == tt.tagHead \|\| node.tag == tt.tagBody)
2076		{
2077	12	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
2078		{
2079	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2080		}
2081
2082	12	continue;
2083		}
2084
2085	2699	if (node.type == Node.END_TAG)
2086		{
2087	41	if (node.tag == null)
2088		{
2089	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2090
2091	0	continue;
2092		}
2093	41	else if (node.tag == tt.tagBr)
2094		{
2095	2	node.type = Node.START_TAG;
2096		}
2097	39	else if (node.tag == tt.tagP)
2098		{
2099	0	Node.coerceNode(lexer, node, tt.tagBr);
2100	0	element.insertNodeAtEnd(node);
2101	0	node = lexer.inferredTag("br");
2102		}
2103		else
2104		{
2105		// if this is the end tag for an ancestor element then infer end tag for this element
2106
2107	39	for (parent = element.parent; parent != null; parent = parent.parent)
2108		{
2109	79	if (node.tag == parent.tag)
2110		{
2111	27	if (!((element.tag.model & Dict.CM_OPT) != 0))
2112		{
2113	2	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2114		}
2115
2116	27	lexer.ungetToken();
2117
2118	27	if ((element.tag.model & Dict.CM_OBJECT) != 0)
2119		{
2120		// pop inline stack
2121	0	while (lexer.istack.size() > lexer.istackbase)
2122		{
2123	0	lexer.popInline(null);
2124		}
2125	0	lexer.istackbase = istackbase;
2126		}
2127
2128	27	Node.trimSpaces(lexer, element);
2129	27	Node.trimEmptyElement(lexer, element);
2130	27	return;
2131		}
2132		}
2133		// special case </tr> etc. for stuff moved in front of table
2134	12	if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
2135		{
2136	0	lexer.ungetToken();
2137	0	Node.trimSpaces(lexer, element);
2138	0	Node.trimEmptyElement(lexer, element);
2139	0	return;
2140		}
2141		}
2142		}
2143
2144		// mixed content model permits text
2145	2672	if (node.type == Node.TEXT_NODE)
2146		{
2147	1506	boolean iswhitenode = false;
2148
2149	1506	if (node.type == Node.TEXT_NODE
2150		&& node.end <= node.start + 1
2151		&& lexer.lexbuf[node.start] == (byte) ' ')
2152		{
2153	371	iswhitenode = true;
2154		}
2155
2156	1506	if (lexer.configuration.encloseBlockText && !iswhitenode)
2157		{
2158	0	lexer.ungetToken();
2159	0	node = lexer.inferredTag("p");
2160	0	element.insertNodeAtEnd(node);
2161	0	parseTag(lexer, node, Lexer.MIXED_CONTENT);
2162	0	continue;
2163		}
2164
2165	1506	if (checkstack)
2166		{
2167	1105	checkstack = false;
2168
2169	1105	if (!((element.tag.model & Dict.CM_MIXED) != 0))
2170		{
2171	1103	if (lexer.inlineDup(node) > 0)
2172		{
2173	0	continue;
2174		}
2175		}
2176		}
2177
2178	1506	element.insertNodeAtEnd(node);
2179	1506	mode = Lexer.MIXED_CONTENT;
2180
2181		// HTML4 strict doesn't allow mixed content for elements with %block; as their content model
2182		// But only body, map, blockquote, form and noscript have content model %block;
2183	1506	if (element.tag == tt.tagBody
2184		\|\| element.tag == tt.tagMap
2185		\|\| element.tag == tt.tagBlockquote
2186		\|\| element.tag == tt.tagForm
2187		\|\| element.tag == tt.tagNoscript)
2188		{
2189	39	lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
2190		}
2191	1506	continue;
2192		}
2193
2194	1166	if (Node.insertMisc(element, node))
2195		{
2196	37	continue;
2197		}
2198
2199		// allow PARAM elements?
2200	1129	if (node.tag == tt.tagParam)
2201		{
2202	3	if (((element.tag.model & Dict.CM_PARAM) != 0)
2203		&& (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG))
2204		{
2205	3	element.insertNodeAtEnd(node);
2206	3	continue;
2207		}
2208
2209		// otherwise discard it
2210	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2211	0	continue;
2212		}
2213
2214		// allow AREA elements?
2215	1126	if (node.tag == tt.tagArea)
2216		{
2217	0	if ((element.tag == tt.tagMap) && (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG))
2218		{
2219	0	element.insertNodeAtEnd(node);
2220	0	continue;
2221		}
2222
2223		// otherwise discard it
2224	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2225	0	continue;
2226		}
2227
2228		// ignore unknown start/end tags
2229	1126	if (node.tag == null)
2230		{
2231	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2232	0	continue;
2233		}
2234
2235		// Allow Dict.CM_INLINE elements here. Allow Dict.CM_BLOCK elements here unless lexer.excludeBlocks is
2236		// yes. LI and DD are special cased. Otherwise infer end tag for this element.
2237
2238	1126	if (!((node.tag.model & Dict.CM_INLINE) != 0))
2239		{
2240	449	if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
2241		{
2242	6	if (node.tag == tt.tagForm)
2243		{
2244	0	badForm(lexer);
2245		}
2246	6	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2247	6	continue;
2248		}
2249
2250		// #427671 - Fix by Randy Waki - 10 Aug 00
2251		// If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION start tag, discard the start
2252		// tag and let the subsequent content get parsed as content of the enclosing LI. This seems to
2253		// mimic IE and Netscape, and avoids an infinite loop: without this check, ParseBlock (which is
2254		// parsing the LI's content) and ParseList (which is parsing the LI's parent's content) repeatedly
2255		// defer to each other to parse the illegal start tag, each time inferring a missing </li> or <li>
2256		// respectively. NOTE: This check is a bit fragile. It specifically checks for the four tags that
2257		// happen to weave their way through the current series of tests performed by ParseBlock and
2258		// ParseList to trigger the infinite loop.
2259
2260	443	if (element.tag == tt.tagLi)
2261		{
2262	48	if (node.tag == tt.tagFrame
2263		\|\| node.tag == tt.tagFrameset
2264		\|\| node.tag == tt.tagOptgroup
2265		\|\| node.tag == tt.tagOption)
2266		{
2267	4	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2268	4	continue;
2269		}
2270		}
2271
2272	439	if (element.tag == tt.tagTd \|\| element.tag == tt.tagTh)
2273		{
2274		// if parent is a table cell, avoid inferring the end of the cell
2275
2276	103	if ((node.tag.model & Dict.CM_HEAD) != 0)
2277		{
2278	0	moveToHead(lexer, element, node);
2279	0	continue;
2280		}
2281
2282	103	if ((node.tag.model & Dict.CM_LIST) != 0)
2283		{
2284	0	lexer.ungetToken();
2285	0	node = lexer.inferredTag("ul");
2286	0	node.addClass("noindent");
2287	0	lexer.excludeBlocks = true;
2288		}
2289	103	else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2290		{
2291	0	lexer.ungetToken();
2292	0	node = lexer.inferredTag("dl");
2293	0	lexer.excludeBlocks = true;
2294		}
2295
2296		// infer end of current table cell
2297	103	if (!((node.tag.model & Dict.CM_BLOCK) != 0))
2298		{
2299	4	lexer.ungetToken();
2300	4	Node.trimSpaces(lexer, element);
2301	4	Node.trimEmptyElement(lexer, element);
2302	4	return;
2303		}
2304		}
2305	336	else if ((node.tag.model & Dict.CM_BLOCK) != 0)
2306		{
2307	280	if (lexer.excludeBlocks)
2308		{
2309	3	if (!((element.tag.model & Dict.CM_OPT) != 0))
2310		{
2311	1	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2312		}
2313
2314	3	lexer.ungetToken();
2315
2316	3	if ((element.tag.model & Dict.CM_OBJECT) != 0)
2317		{
2318	0	lexer.istackbase = istackbase;
2319		}
2320
2321	3	Node.trimSpaces(lexer, element);
2322	3	Node.trimEmptyElement(lexer, element);
2323	3	return;
2324		}
2325		}
2326		else
2327		{
2328		// things like list items
2329
2330	56	if ((node.tag.model & Dict.CM_HEAD) != 0)
2331		{
2332	0	moveToHead(lexer, element, node);
2333	0	continue;
2334		}
2335
2336		// special case where a form start tag occurs in a tr and is followed by td or th
2337	56	if (element.tag == tt.tagForm && element.parent.tag == tt.tagTd && element.parent.implicit)
2338		{
2339	1	if (node.tag == tt.tagTd)
2340		{
2341	1	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2342	1	continue;
2343		}
2344
2345	0	if (node.tag == tt.tagTh)
2346		{
2347	0	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2348	0	node = element.parent;
2349	0	node.element = "th";
2350	0	node.tag = tt.tagTh;
2351	0	continue;
2352		}
2353		}
2354
2355	55	if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
2356		{
2357	2	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
2358		}
2359
2360	55	lexer.ungetToken();
2361
2362	55	if ((node.tag.model & Dict.CM_LIST) != 0)
2363		{
2364	16	if (element.parent != null
2365		&& element.parent.tag != null
2366		&& element.parent.tag.getParser() == LIST)
2367		{
2368	16	Node.trimSpaces(lexer, element);
2369	16	Node.trimEmptyElement(lexer, element);
2370	16	return;
2371		}
2372
2373	0	node = lexer.inferredTag("ul");
2374	0	node.addClass("noindent");
2375		}
2376	39	else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
2377		{
2378	37	if (element.parent.tag == tt.tagDl)
2379		{
2380	36	Node.trimSpaces(lexer, element);
2381	36	Node.trimEmptyElement(lexer, element);
2382	36	return;
2383		}
2384
2385	1	node = lexer.inferredTag("dl");
2386		}
2387	2	else if ((node.tag.model & Dict.CM_TABLE) != 0 \|\| (node.tag.model & Dict.CM_ROW) != 0)
2388		{
2389	1	node = lexer.inferredTag("table");
2390		}
2391	1	else if ((element.tag.model & Dict.CM_OBJECT) != 0)
2392		{
2393		// pop inline stack
2394	0	while (lexer.istack.size() > lexer.istackbase)
2395		{
2396	0	lexer.popInline(null);
2397		}
2398	0	lexer.istackbase = istackbase;
2399	0	Node.trimSpaces(lexer, element);
2400	0	Node.trimEmptyElement(lexer, element);
2401	0	return;
2402
2403		}
2404		else
2405		{
2406	1	Node.trimSpaces(lexer, element);
2407	1	Node.trimEmptyElement(lexer, element);
2408	1	return;
2409		}
2410		}
2411		}
2412
2413		// parse known element
2414	1055	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
2415		{
2416	1049	if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
2417		{
2418		// DSR - 27Apr02 ensure we wrap anchors and other inline content
2419		// fgiust: commented out due to [1403105]: java.lang.StackOverflowError in Tidy.parseDOM()
2420		// if (lexer.configuration.encloseBlockText)
2421		// {
2422		// lexer.ungetToken();
2423		// node = lexer.inferredTag("p");
2424		// element.insertNodeAtEnd(node);
2425		// parseTag(lexer, node, Lexer.MIXED_CONTENT);
2426		// continue;
2427		// }
2428
2429	671	if (checkstack && !node.implicit)
2430		{
2431	219	checkstack = false;
2432
2433		// #431731 - fix by Randy Waki 25 Dec 00
2434	219	if (!TidyUtils.toBoolean(element.tag.model & Dict.CM_MIXED))
2435		{
2436	218	if (lexer.inlineDup(node) > 0)
2437		{
2438	6	continue;
2439		}
2440		}
2441		}
2442
2443	665	mode = Lexer.MIXED_CONTENT;
2444		}
2445		else
2446		{
2447	378	checkstack = true;
2448	378	mode = Lexer.IGNORE_WHITESPACE;
2449		}
2450
2451		// trim white space before <br>
2452	1043	if (node.tag == tt.tagBr)
2453		{
2454	121	Node.trimSpaces(lexer, element);
2455		}
2456
2457	1043	element.insertNodeAtEnd(node);
2458
2459	1043	if (node.implicit)
2460		{
2461	32	lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
2462		}
2463
2464	1043	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent
2465		);
2466	1043	continue;
2467		}
2468
2469		// discard unexpected tags
2470	6	if (node.type == Node.END_TAG)
2471		{
2472	6	lexer.popInline(node); // if inline end tag
2473		}
2474
2475	6	lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
2476	6	continue;
2477		}
2478
2479	12	if (!((element.tag.model & Dict.CM_OPT) != 0))
2480		{
2481	8	lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
2482		}
2483
2484	12	if ((element.tag.model & Dict.CM_OBJECT) != 0)
2485		{
2486		// pop inline stack
2487	0	while (lexer.istack.size() > lexer.istackbase)
2488		{
2489	0	lexer.popInline(null);
2490		}
2491	0	lexer.istackbase = istackbase;
2492		}
2493
2494	12	Node.trimSpaces(lexer, element);
2495	12	Node.trimEmptyElement(lexer, element);
2496		}
2497
2498		}
2499
2500		/**
2501		* Parser for TABLE.
2502		*/
2503		public static class ParseTableTag implements Parser
2504		{
2505
2506		/**
2507		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2508		*/
2509	71	public void parse(Lexer lexer, Node table, short mode)
2510		{
2511	71	Node node, parent;
2512	71	int istackbase;
2513	71	TagTable tt = lexer.configuration.tt;
2514
2515	71	lexer.deferDup();
2516	71	istackbase = lexer.istackbase;
2517	71	lexer.istackbase = lexer.istack.size();
2518
2519	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2520		{
2521	180	if (node.tag == table.tag && node.type == Node.END_TAG)
2522		{
2523	64	lexer.istackbase = istackbase;
2524	64	table.closed = true;
2525	64	Node.trimEmptyElement(lexer, table);
2526	64	return;
2527		}
2528
2529		// deal with comments etc.
2530	116	if (Node.insertMisc(table, node))
2531		{
2532	2	continue;
2533		}
2534
2535		// discard unknown tags
2536	114	if (node.tag == null && node.type != Node.TEXT_NODE)
2537		{
2538	0	lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2539	0	continue;
2540		}
2541
2542		// if TD or TH or text or inline or block then infer <TR>
2543
2544	114	if (node.type != Node.END_TAG)
2545		{
2546	107	if (node.tag == tt.tagTd \|\| node.tag == tt.tagTh \|\| node.tag == tt.tagTable)
2547		{
2548	2	lexer.ungetToken();
2549	2	node = lexer.inferredTag("tr");
2550	2	lexer.report.warning(lexer, table, node, Report.MISSING_STARTTAG);
2551		}
2552	105	else if (node.type == Node.TEXT_NODE \|\| (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0)
2553		{
2554	2	Node.insertNodeBeforeElement(table, node);
2555	2	lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2556	2	lexer.exiled = true;
2557
2558	2	if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type == TextNode) - fix by Young
2559		{
2560	2	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2561		}
2562
2563	2	lexer.exiled = false;
2564	2	continue;
2565		}
2566	103	else if ((node.tag.model & Dict.CM_HEAD) != 0)
2567		{
2568	0	moveToHead(lexer, table, node);
2569	0	continue;
2570		}
2571		}
2572
2573		// if this is the end tag for an ancestor element then infer end tag for this element
2574
2575	112	if (node.type == Node.END_TAG)
2576		{
2577	7	if (node.tag == tt.tagForm
2578		\|\| (node.tag != null && ((node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0)))
2579		{
2580	1	badForm(lexer);
2581	1	lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2582	1	continue;
2583		}
2584
2585	6	if ((node.tag != null && (node.tag.model & (Dict.CM_TABLE \| Dict.CM_ROW)) != 0)
2586		\|\| (node.tag != null && (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0))
2587		{
2588	2	lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2589	2	continue;
2590		}
2591
2592	4	for (parent = table.parent; parent != null; parent = parent.parent)
2593		{
2594	4	if (node.tag == parent.tag)
2595		{
2596	4	lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
2597	4	lexer.ungetToken();
2598	4	lexer.istackbase = istackbase;
2599	4	Node.trimEmptyElement(lexer, table);
2600	4	return;
2601		}
2602		}
2603		}
2604
2605	105	if (!((node.tag.model & Dict.CM_TABLE) != 0))
2606		{
2607	0	lexer.ungetToken();
2608	0	lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
2609	0	lexer.istackbase = istackbase;
2610	0	Node.trimEmptyElement(lexer, table);
2611	0	return;
2612		}
2613
2614	105	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
2615		{
2616	105	table.insertNodeAtEnd(node);
2617
2618	105	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2619	105	continue;
2620		}
2621
2622		// discard unexpected text nodes and end tags
2623	0	lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
2624		}
2625
2626	3	lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
2627	3	Node.trimEmptyElement(lexer, table);
2628	3	lexer.istackbase = istackbase;
2629		}
2630
2631		}
2632
2633		/**
2634		* Parser for COLGROUP.
2635		*/
2636		public static class ParseColGroup implements Parser
2637		{
2638
2639		/**
2640		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2641		*/
2642	6	public void parse(Lexer lexer, Node colgroup, short mode)
2643		{
2644	6	Node node, parent;
2645	6	TagTable tt = lexer.configuration.tt;
2646
2647	6	if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
2648		{
2649	0	return;
2650		}
2651
2652	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2653		{
2654	6	if (node.tag == colgroup.tag && node.type == Node.END_TAG)
2655		{
2656	0	colgroup.closed = true;
2657	0	return;
2658		}
2659
2660		// if this is the end tag for an ancestor element then infer end tag for this element
2661
2662	6	if (node.type == Node.END_TAG)
2663		{
2664	0	if (node.tag == tt.tagForm)
2665		{
2666	0	badForm(lexer);
2667	0	lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2668	0	continue;
2669		}
2670
2671	0	for (parent = colgroup.parent; parent != null; parent = parent.parent)
2672		{
2673
2674	0	if (node.tag == parent.tag)
2675		{
2676	0	lexer.ungetToken();
2677	0	return;
2678		}
2679		}
2680		}
2681
2682	6	if (node.type == Node.TEXT_NODE)
2683		{
2684	0	lexer.ungetToken();
2685	0	return;
2686		}
2687
2688		// deal with comments etc.
2689	6	if (Node.insertMisc(colgroup, node))
2690		{
2691	0	continue;
2692		}
2693
2694		// discard unknown tags
2695	6	if (node.tag == null)
2696		{
2697	0	lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2698	0	continue;
2699		}
2700
2701	6	if (node.tag != tt.tagCol)
2702		{
2703	6	lexer.ungetToken();
2704	6	return;
2705		}
2706
2707	0	if (node.type == Node.END_TAG)
2708		{
2709	0	lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
2710	0	continue;
2711		}
2712
2713		// node should be <COL>
2714	0	colgroup.insertNodeAtEnd(node);
2715	0	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2716		}
2717		}
2718
2719		}
2720
2721		/**
2722		* Parser for ROWGROUP.
2723		*/
2724		public static class ParseRowGroup implements Parser
2725		{
2726
2727		/**
2728		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2729		*/
2730	4	public void parse(Lexer lexer, Node rowgroup, short mode)
2731		{
2732	4	Node node, parent;
2733	4	TagTable tt = lexer.configuration.tt;
2734
2735	4	if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
2736		{
2737	0	return;
2738		}
2739
2740	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2741		{
2742	136	if (node.tag == rowgroup.tag)
2743		{
2744	4	if (node.type == Node.END_TAG)
2745		{
2746	4	rowgroup.closed = true;
2747	4	Node.trimEmptyElement(lexer, rowgroup);
2748	4	return;
2749		}
2750
2751	0	lexer.ungetToken();
2752	0	return;
2753		}
2754
2755		// if </table> infer end tag
2756	132	if (node.tag == tt.tagTable && node.type == Node.END_TAG)
2757		{
2758	0	lexer.ungetToken();
2759	0	Node.trimEmptyElement(lexer, rowgroup);
2760	0	return;
2761		}
2762
2763		// deal with comments etc.
2764	132	if (Node.insertMisc(rowgroup, node))
2765		{
2766	0	continue;
2767		}
2768
2769		// discard unknown tags
2770	132	if (node.tag == null && node.type != Node.TEXT_NODE)
2771		{
2772	0	lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2773	0	continue;
2774		}
2775
2776		// if TD or TH then infer <TR> if text or inline or block move before table if head content move to
2777		// head
2778
2779	132	if (node.type != Node.END_TAG)
2780		{
2781	132	if (node.tag == tt.tagTd \|\| node.tag == tt.tagTh)
2782		{
2783	0	lexer.ungetToken();
2784	0	node = lexer.inferredTag("tr");
2785	0	lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2786		}
2787	132	else if (node.type == Node.TEXT_NODE \|\| (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0)
2788		{
2789	0	Node.moveBeforeTable(rowgroup, node, tt);
2790	0	lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2791	0	lexer.exiled = true;
2792
2793		// #427662 was (!node.type == TextNode) fix by Young 04 Aug 00
2794	0	if (node.type != Node.TEXT_NODE)
2795		{
2796	0	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2797		}
2798
2799	0	lexer.exiled = false;
2800	0	continue;
2801		}
2802	132	else if ((node.tag.model & Dict.CM_HEAD) != 0)
2803		{
2804	0	lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
2805	0	moveToHead(lexer, rowgroup, node);
2806	0	continue;
2807		}
2808		}
2809
2810		// if this is the end tag for ancestor element then infer end tag for this element
2811
2812	132	if (node.type == Node.END_TAG)
2813		{
2814
2815	0	if (node.tag == tt.tagForm
2816		\|\| (node.tag != null && (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0))
2817		{
2818	0	if (node.tag == tt.tagForm)
2819		{
2820	0	badForm(lexer);
2821		}
2822	0	lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2823	0	continue;
2824		}
2825
2826	0	if (node.tag == tt.tagTr \|\| node.tag == tt.tagTd \|\| node.tag == tt.tagTh)
2827		{
2828	0	lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2829	0	continue;
2830		}
2831
2832	0	for (parent = rowgroup.parent; parent != null; parent = parent.parent)
2833		{
2834	0	if (node.tag == parent.tag)
2835		{
2836	0	lexer.ungetToken();
2837	0	Node.trimEmptyElement(lexer, rowgroup);
2838	0	return;
2839		}
2840		}
2841
2842		}
2843
2844		// if THEAD, TFOOT or TBODY then implied end tag
2845
2846	132	if ((node.tag.model & Dict.CM_ROWGRP) != 0)
2847		{
2848	0	if (node.type != Node.END_TAG)
2849		{
2850	0	lexer.ungetToken();
2851		}
2852
2853	0	Node.trimEmptyElement(lexer, rowgroup);
2854	0	return;
2855		}
2856
2857	132	if (node.type == Node.END_TAG)
2858		{
2859	0	lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
2860	0	continue;
2861		}
2862
2863	132	if (!(node.tag == tt.tagTr))
2864		{
2865	0	node = lexer.inferredTag("tr");
2866	0	lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
2867	0	lexer.ungetToken();
2868		}
2869
2870		// node should be <TR>
2871	132	rowgroup.insertNodeAtEnd(node);
2872	132	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2873		}
2874	0	Node.trimEmptyElement(lexer, rowgroup);
2875		}
2876		}
2877
2878		/**
2879		* Parser for ROW.
2880		*/
2881		public static class ParseRow implements Parser
2882		{
2883
2884		/**
2885		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
2886		*/
2887	226	public void parse(Lexer lexer, Node row, short mode)
2888		{
2889	226	Node node, parent;
2890	226	boolean excludeState;
2891	226	TagTable tt = lexer.configuration.tt;
2892
2893	226	if ((row.tag.model & Dict.CM_EMPTY) != 0)
2894		{
2895	0	return;
2896		}
2897
2898	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
2899		{
2900	1312	if (node.tag == row.tag)
2901		{
2902	219	if (node.type == Node.END_TAG)
2903		{
2904	215	row.closed = true;
2905	215	Node.fixEmptyRow(lexer, row);
2906	215	return;
2907		}
2908
2909	4	lexer.ungetToken();
2910	4	Node.fixEmptyRow(lexer, row);
2911	4	return;
2912		}
2913
2914		// if this is the end tag for an ancestor element then infer end tag for this element
2915	1093	if (node.type == Node.END_TAG)
2916		{
2917	15	if (node.tag == tt.tagForm
2918		\|\| (node.tag != null && (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0))
2919		{
2920	10	if (node.tag == tt.tagForm)
2921		{
2922	2	badForm(lexer);
2923		}
2924	10	lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2925	10	continue;
2926		}
2927
2928	5	if (node.tag == tt.tagTd \|\| node.tag == tt.tagTh)
2929		{
2930	1	lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2931	1	continue;
2932		}
2933
2934	8	for (parent = row.parent; parent != null; parent = parent.parent)
2935		{
2936	8	if (node.tag == parent.tag)
2937		{
2938	4	lexer.ungetToken();
2939	4	Node.trimEmptyElement(lexer, row);
2940	4	return;
2941		}
2942		}
2943		}
2944
2945		// deal with comments etc.
2946	1078	if (Node.insertMisc(row, node))
2947		{
2948	4	continue;
2949		}
2950
2951		// discard unknown tags
2952	1074	if (node.tag == null && node.type != Node.TEXT_NODE)
2953		{
2954	0	lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2955	0	continue;
2956		}
2957
2958		// discard unexpected <table> element
2959	1074	if (node.tag == tt.tagTable)
2960		{
2961	2	lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2962	2	continue;
2963		}
2964
2965		// THEAD, TFOOT or TBODY
2966	1072	if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
2967		{
2968	0	lexer.ungetToken();
2969	0	Node.trimEmptyElement(lexer, row);
2970	0	return;
2971		}
2972
2973	1072	if (node.type == Node.END_TAG)
2974		{
2975	0	lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
2976	0	continue;
2977		}
2978
2979		// if text or inline or block move before table if head content move to head
2980
2981	1072	if (node.type != Node.END_TAG)
2982		{
2983	1072	if (node.tag == tt.tagForm)
2984		{
2985	2	lexer.ungetToken();
2986	2	node = lexer.inferredTag("td");
2987	2	lexer.report.warning(lexer, row, node, Report.MISSING_STARTTAG);
2988		}
2989	1070	else if (node.type == Node.TEXT_NODE \|\| (node.tag.model & (Dict.CM_BLOCK \| Dict.CM_INLINE)) != 0)
2990		{
2991	5	Node.moveBeforeTable(row, node, tt);
2992	5	lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
2993	5	lexer.exiled = true;
2994
2995	5	if (node.type != Node.TEXT_NODE)
2996		{
2997	5	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
2998		}
2999
3000	5	lexer.exiled = false;
3001	5	continue;
3002		}
3003	1065	else if ((node.tag.model & Dict.CM_HEAD) != 0)
3004		{
3005	0	lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
3006	0	moveToHead(lexer, row, node);
3007	0	continue;
3008		}
3009		}
3010
3011	1067	if (!(node.tag == tt.tagTd \|\| node.tag == tt.tagTh))
3012		{
3013	0	lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
3014	0	continue;
3015		}
3016
3017		// node should be <TD> or <TH>
3018	1067	row.insertNodeAtEnd(node);
3019	1067	excludeState = lexer.excludeBlocks;
3020	1067	lexer.excludeBlocks = false;
3021	1067	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3022	1067	lexer.excludeBlocks = excludeState;
3023
3024		// pop inline stack
3025
3026	1067	while (lexer.istack.size() > lexer.istackbase)
3027		{
3028	1	lexer.popInline(null);
3029		}
3030		}
3031
3032	3	Node.trimEmptyElement(lexer, row);
3033		}
3034
3035		}
3036
3037		/**
3038		* Parser for NOFRAMES.
3039		*/
3040		public static class ParseNoFrames implements Parser
3041		{
3042
3043		/**
3044		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3045		*/
3046	6	public void parse(Lexer lexer, Node noframes, short mode)
3047		{
3048	6	Node node;
3049	6	TagTable tt = lexer.configuration.tt;
3050
3051	6	lexer.badAccess \|= Report.USING_NOFRAMES;
3052	6	mode = Lexer.IGNORE_WHITESPACE;
3053
3054	?	while ((node = lexer.getToken(mode)) != null)
3055		{
3056	10	if (node.tag == noframes.tag && node.type == Node.END_TAG)
3057		{
3058	2	noframes.closed = true;
3059	2	Node.trimSpaces(lexer, noframes);
3060	2	return;
3061		}
3062
3063	8	if ((node.tag == tt.tagFrame \|\| node.tag == tt.tagFrameset))
3064		{
3065
3066	2	Node.trimSpaces(lexer, noframes);
3067
3068		// fix for [539369]
3069	2	if (node.type == Node.END_TAG)
3070		{
3071	1	lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3072		// Throw it away
3073		}
3074		else
3075		{
3076	1	lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
3077
3078	1	lexer.ungetToken();
3079		}
3080	2	return;
3081		}
3082
3083	6	if (node.tag == tt.tagHtml)
3084		{
3085	0	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
3086		{
3087	0	lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3088		}
3089
3090	0	continue;
3091		}
3092
3093		// deal with comments etc.
3094	6	if (Node.insertMisc(noframes, node))
3095		{
3096	0	continue;
3097		}
3098
3099	6	if (node.tag == tt.tagBody && node.type == Node.START_TAG)
3100		{
3101	3	boolean seenbody = lexer.seenEndBody;
3102	3	noframes.insertNodeAtEnd(node);
3103	3	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent
3104
3105	3	if (seenbody)
3106		{
3107	1	Node.coerceNode(lexer, node, tt.tagDiv);
3108	1	moveNodeToBody(lexer, node);
3109		}
3110	3	continue;
3111		}
3112
3113		// implicit body element inferred
3114	3	if (node.type == Node.TEXT_NODE \|\| (node.tag != null && node.type != Node.END_TAG))
3115		{
3116	3	if (lexer.seenEndBody)
3117		{
3118	2	Node body = lexer.root.findBody(tt);
3119
3120	2	if (node.type == Node.TEXT_NODE)
3121		{
3122	2	lexer.ungetToken();
3123	2	node = lexer.inferredTag("p");
3124	2	lexer.report.warning(lexer, noframes, node, Report.CONTENT_AFTER_BODY);
3125		}
3126
3127	2	body.insertNodeAtEnd(node);
3128		}
3129		else
3130		{
3131	1	lexer.ungetToken();
3132	1	node = lexer.inferredTag("body");
3133	1	if (lexer.configuration.xmlOut)
3134		{
3135	0	lexer.report.warning(lexer, noframes, node, Report.INSERTING_TAG);
3136		}
3137	1	noframes.insertNodeAtEnd(node);
3138		}
3139	3	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3140		// MixedContent
3141	3	continue;
3142		}
3143		// discard unexpected end tags
3144	0	lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
3145		}
3146
3147	2	lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
3148		}
3149
3150		}
3151
3152		/**
3153		* Parser for SELECT.
3154		*/
3155		public static class ParseSelect implements Parser
3156		{
3157
3158		/**
3159		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3160		*/
3161	7	public void parse(Lexer lexer, Node field, short mode)
3162		{
3163	7	Node node;
3164	7	TagTable tt = lexer.configuration.tt;
3165
3166	7	lexer.insert = -1; // defer implicit inline start tags
3167
3168	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3169		{
3170	31	if (node.tag == field.tag && node.type == Node.END_TAG)
3171		{
3172	7	field.closed = true;
3173	7	Node.trimSpaces(lexer, field);
3174	7	return;
3175		}
3176
3177		// deal with comments etc.
3178	24	if (Node.insertMisc(field, node))
3179		{
3180	2	continue;
3181		}
3182
3183	22	if (node.type == Node.START_TAG
3184		&& (node.tag == tt.tagOption \|\| node.tag == tt.tagOptgroup \|\| node.tag == tt.tagScript))
3185		{
3186	22	field.insertNodeAtEnd(node);
3187	22	parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
3188	22	continue;
3189		}
3190
3191		// discard unexpected tags
3192	0	lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3193		}
3194
3195	0	lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
3196		}
3197
3198		}
3199
3200		/**
3201		* Parser for text nodes.
3202		*/
3203		public static class ParseText implements Parser
3204		{
3205
3206		/**
3207		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3208		*/
3209	28	public void parse(Lexer lexer, Node field, short mode)
3210		{
3211	28	Node node;
3212	28	TagTable tt = lexer.configuration.tt;
3213
3214	28	lexer.insert = -1; // defer implicit inline start tags
3215
3216	28	if (field.tag == tt.tagTextarea)
3217		{
3218	6	mode = Lexer.PREFORMATTED;
3219		}
3220		else
3221		{
3222	22	mode = Lexer.MIXED_CONTENT; // kludge for font tags
3223		}
3224
3225	?	while ((node = lexer.getToken(mode)) != null)
3226		{
3227	55	if (node.tag == field.tag && node.type == Node.END_TAG)
3228		{
3229	13	field.closed = true;
3230	13	Node.trimSpaces(lexer, field);
3231	13	return;
3232		}
3233
3234		// deal with comments etc.
3235	42	if (Node.insertMisc(field, node))
3236		{
3237	0	continue;
3238		}
3239
3240	42	if (node.type == Node.TEXT_NODE)
3241		{
3242		// only called for 1st child
3243	23	if (field.content == null && !((mode & Lexer.PREFORMATTED) != 0))
3244		{
3245	16	Node.trimSpaces(lexer, field);
3246		}
3247
3248	23	if (node.start >= node.end)
3249		{
3250	0	continue;
3251		}
3252
3253	23	field.insertNodeAtEnd(node);
3254	23	continue;
3255		}
3256
3257		// for textarea should all cases of < and & be escaped?
3258		// discard inline tags e.g. font
3259	19	if (node.tag != null
3260		&& ((node.tag.model & Dict.CM_INLINE) != 0)
3261		&& (node.tag.model & Dict.CM_FIELD) == 0) // #487283 - fix by Lee Passey 25 Jan 02
3262		{
3263	4	lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3264	4	continue;
3265		}
3266
3267		// terminate element on other tags
3268	15	if (!((field.tag.model & Dict.CM_OPT) != 0))
3269		{
3270	0	lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
3271		}
3272
3273	15	lexer.ungetToken();
3274	15	Node.trimSpaces(lexer, field);
3275	15	return;
3276		}
3277
3278	0	if (!((field.tag.model & Dict.CM_OPT) != 0))
3279		{
3280	0	lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
3281		}
3282		}
3283
3284		}
3285
3286		/**
3287		* Parser for OPTGROUP.
3288		*/
3289		public static class ParseOptGroup implements Parser
3290		{
3291
3292		/**
3293		* @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
3294		*/
3295	0	public void parse(Lexer lexer, Node field, short mode)
3296		{
3297	0	Node node;
3298	0	TagTable tt = lexer.configuration.tt;
3299
3300	0	lexer.insert = -1; // defer implicit inline start tags
3301
3302	0	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3303		{
3304	0	if (node.tag == field.tag && node.type == Node.END_TAG)
3305		{
3306	0	field.closed = true;
3307	0	Node.trimSpaces(lexer, field);
3308	0	return;
3309		}
3310
3311		// deal with comments etc.
3312	0	if (Node.insertMisc(field, node))
3313		{
3314	0	continue;
3315		}
3316
3317	0	if (node.type == Node.START_TAG && (node.tag == tt.tagOption \|\| node.tag == tt.tagOptgroup))
3318		{
3319	0	if (node.tag == tt.tagOptgroup)
3320		{
3321	0	lexer.report.warning(lexer, field, node, Report.CANT_BE_NESTED);
3322		}
3323
3324	0	field.insertNodeAtEnd(node);
3325	0	parseTag(lexer, node, Lexer.MIXED_CONTENT);
3326	0	continue;
3327		}
3328
3329		// discard unexpected tags
3330	0	lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
3331		}
3332		}
3333
3334		}
3335
3336		/**
3337		* HTML is the top level element.
3338		*/
3339	221	public static Node parseDocument(Lexer lexer)
3340		{
3341	221	Node node, document, html;
3342	221	Node doctype = null;
3343	221	TagTable tt = lexer.configuration.tt;
3344
3345	221	document = lexer.newNode();
3346	221	document.type = Node.ROOT_NODE;
3347
3348	221	lexer.root = document;
3349
3350	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3351		{
3352		// deal with comments etc.
3353	363	if (Node.insertMisc(document, node))
3354		{
3355	27	continue;
3356		}
3357
3358	336	if (node.type == Node.DOCTYPE_TAG)
3359		{
3360	115	if (doctype == null)
3361		{
3362	115	document.insertNodeAtEnd(node);
3363	115	doctype = node;
3364		}
3365		else
3366		{
3367	0	lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
3368		}
3369	115	continue;
3370		}
3371
3372	221	if (node.type == Node.END_TAG)
3373		{
3374	0	lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO?
3375	0	continue;
3376		}
3377
3378	221	if (node.type != Node.START_TAG \|\| node.tag != tt.tagHtml)
3379		{
3380	11	lexer.ungetToken();
3381	11	html = lexer.inferredTag("html");
3382		}
3383		else
3384		{
3385	210	html = node;
3386		}
3387
3388	221	if (document.findDocType() == null && !lexer.configuration.bodyOnly)
3389		{
3390	104	lexer.report.warning(lexer, null, null, Report.MISSING_DOCTYPE);
3391		}
3392
3393	221	document.insertNodeAtEnd(html);
3394	221	HTML.parse(lexer, html, (short) 0); // TODO?
3395	221	break;
3396		}
3397
3398	221	return document;
3399		}
3400
3401		/**
3402		* Indicates whether or not whitespace should be preserved for this element. If an <code>xml:space</code>
3403		* attribute is found, then if the attribute value is <code>preserve</code>, returns <code>true</code>. For
3404		* any other value, returns <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
3405		* found, then the following element names result in a return value of <code>true:
3406		* pre, script, style,</code> and
3407		* <code>xsl:text</code>. Finally, if a <code>TagTable</code> was passed in and the element appears as the
3408		* "pre" element in the <code>TagTable</code>, then <code>true</code> will be returned. Otherwise,
3409		* <code>false</code> is returned.
3410		* @param element The <code>Node</code> to test to see if whitespace should be preserved.
3411		* @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> function. This may be
3412		* <code>null</code>, in which case this test is bypassed.
3413		* @return <code>true</code> or <code>false</code>, as explained above.
3414		*/
3415	81	public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
3416		{
3417	81	AttVal attribute;
3418
3419		// search attributes for xml:space
3420	81	for (attribute = element.attributes; attribute != null; attribute = attribute.next)
3421		{
3422	30	if (attribute.attribute.equals("xml:space"))
3423		{
3424	0	if (attribute.value.equals("preserve"))
3425		{
3426	0	return true;
3427		}
3428
3429	0	return false;
3430		}
3431		}
3432
3433	81	if (element.element == null) // Debian Bug #137124. Fix based on suggestion by Cesar Eduardo Barros 06 Mar 02
3434		{
3435	0	return false;
3436		}
3437
3438		// kludge for html docs without explicit xml:space attribute
3439	81	if ("pre".equalsIgnoreCase(element.element)
3440		\|\| "script".equalsIgnoreCase(element.element)
3441		\|\| "style".equalsIgnoreCase(element.element))
3442		{
3443	0	return true;
3444		}
3445
3446	81	if ((tt != null) && (tt.findParser(element) == PRE))
3447		{
3448	2	return true;
3449		}
3450
3451		// kludge for XSL docs
3452	79	if ("xsl:text".equalsIgnoreCase(element.element))
3453		{
3454	2	return true;
3455		}
3456
3457	77	return false;
3458		}
3459
3460		/**
3461		* XML documents.
3462		*/
3463	36	public static void parseXMLElement(Lexer lexer, Node element, short mode)
3464		{
3465	36	Node node;
3466
3467		// if node is pre or has xml:space="preserve" then do so
3468
3469	36	if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
3470		{
3471	2	mode = Lexer.PREFORMATTED;
3472		}
3473
3474	?	while ((node = lexer.getToken(mode)) != null)
3475		{
3476	69	if (node.type == Node.END_TAG && node.element.equals(element.element))
3477		{
3478	27	element.closed = true;
3479	27	break;
3480		}
3481
3482		// discard unexpected end tags
3483	42	if (node.type == Node.END_TAG)
3484		{
3485	1	lexer.report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
3486	1	continue;
3487		}
3488
3489		// parse content on seeing start tag
3490	41	if (node.type == Node.START_TAG)
3491		{
3492	21	parseXMLElement(lexer, node, mode);
3493		}
3494
3495	41	element.insertNodeAtEnd(node);
3496		}
3497
3498		// if first child is text then trim initial space and delete text node if it is empty.
3499
3500	36	node = element.content;
3501
3502	36	if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
3503		{
3504	13	if (node.textarray[node.start] == (byte) ' ')
3505		{
3506	0	node.start++;
3507
3508	0	if (node.start >= node.end)
3509		{
3510	0	Node.discardElement(node);
3511		}
3512		}
3513		}
3514
3515		// if last child is text then trim final space and delete the text node if it is empty
3516
3517	36	node = element.last;
3518
3519	36	if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
3520		{
3521	13	if (node.textarray[node.end - 1] == (byte) ' ')
3522		{
3523	2	node.end--;
3524
3525	2	if (node.start >= node.end)
3526		{
3527	0	Node.discardElement(node);
3528		}
3529		}
3530		}
3531		}
3532
3533	17	public static Node parseXMLDocument(Lexer lexer)
3534		{
3535	17	Node node, document, doctype;
3536
3537	17	document = lexer.newNode();
3538	17	document.type = Node.ROOT_NODE;
3539	17	doctype = null;
3540	17	lexer.configuration.xmlTags = true;
3541
3542	?	while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
3543		{
3544		// discard unexpected end tags
3545	38	if (node.type == Node.END_TAG)
3546		{
3547	1	lexer.report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
3548	1	continue;
3549		}
3550
3551		// deal with comments etc.
3552	37	if (Node.insertMisc(document, node))
3553		{
3554	20	continue;
3555		}
3556
3557	17	if (node.type == Node.DOCTYPE_TAG)
3558		{
3559	1	if (doctype == null)
3560		{
3561	1	document.insertNodeAtEnd(node);
3562	1	doctype = node;
3563		}
3564		else
3565		{
3566	0	lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
3567		}
3568	1	continue;
3569		}
3570
3571	16	if (node.type == Node.START_END_TAG)
3572		{
3573	1	document.insertNodeAtEnd(node);
3574	1	continue;
3575		}
3576
3577		// if start tag then parse element's content
3578	15	if (node.type == Node.START_TAG)
3579		{
3580	15	document.insertNodeAtEnd(node);
3581	15	parseXMLElement(lexer, node, Lexer.IGNORE_WHITESPACE);
3582		}
3583
3584		}
3585
3586	17	if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
3587		{
3588	0	lexer.report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
3589		}
3590
3591		// ensure presence of initial <?XML version="1.0"?>
3592	17	if (lexer.configuration.xmlPi)
3593		{
3594	2	lexer.fixXmlDecl(document);
3595		}
3596
3597	17	return document;
3598		}
3599
3600		/**
3601		* errors in positioning of form start or end tags generally require human intervention to fix.
3602		*/
3603	3	static void badForm(Lexer lexer)
3604		{
3605	3	lexer.badForm = 1;
3606	3	lexer.errors++;
3607		}
3608
3609		}