Maven Clover report

Clover coverage report - Maven Clover report

Coverage timestamp: Tue Aug 1 2006 15:09:51 CEST

FRAMES NO FRAMES

file stats:	LOC:	2,408		Methods:	52
	NCLOC:	1,555		Classes:	1

Source file

Conditionals

Statements

Methods

TOTAL

Clean.java

62.2%

66%

84.6%

65.4%

1		/*
2		* Java HTML Tidy - JTidy
3		* HTML parser and pretty printer
4		*
5		* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6		* Institute of Technology, Institut National de Recherche en
7		* Informatique et en Automatique, Keio University). All Rights
8		* Reserved.
9		*
10		* Contributing Author(s):
11		*
12		* Dave Raggett <dsr@w3.org>
13		* Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14		* Gary L Peskin <garyp@firstech.com> (Java development)
15		* Sami Lempinen <sami@lempinen.net> (release management)
16		* Fabrizio Giustina <fgiust at users.sourceforge.net>
17		*
18		* The contributing author(s) would like to thank all those who
19		* helped with testing, bug fixes, and patience. This wouldn't
20		* have been possible without all of you.
21		*
22		* COPYRIGHT NOTICE:
23		*
24		* This software and documentation is provided "as is," and
25		* the copyright holders and contributing author(s) make no
26		* representations or warranties, express or implied, including
27		* but not limited to, warranties of merchantability or fitness
28		* for any particular purpose or that the use of the software or
29		* documentation will not infringe any third party patents,
30		* copyrights, trademarks or other rights.
31		*
32		* The copyright holders and contributing author(s) will not be
33		* liable for any direct, indirect, special or consequential damages
34		* arising out of any use of the software or documentation, even if
35		* advised of the possibility of such damage.
36		*
37		* Permission is hereby granted to use, copy, modify, and distribute
38		* this source code, or portions hereof, documentation and executables,
39		* for any purpose, without fee, subject to the following restrictions:
40		*
41		* 1. The origin of this source code must not be misrepresented.
42		* 2. Altered versions must be plainly marked as such and must
43		* not be misrepresented as being the original source.
44		* 3. This Copyright notice may not be removed or altered from any
45		* source or altered source distribution.
46		*
47		* The copyright holders and contributing author(s) specifically
48		* permit, without fee, and encourage the use of this source code
49		* as a component for supporting the Hypertext Markup Language in
50		* commercial products. If you use this source code in a product,
51		* acknowledgment is not required but would be appreciated.
52		*
53		*/
54		package org.w3c.tidy;
55
56		/**
57		* Clean up misuse of presentation markup. Filters from other formats such as Microsoft Word often make excessive use of
58		* presentation markup such as font tags, B, I, and the align attribute. By applying a set of production rules, it is
59		* straight forward to transform this to use CSS. Some rules replace some of the children of an element by style
60		* properties on the element, e.g.
61		* <p>
62		* <b>... </b>
63		* </p>.
64		* <p style="font-weight: bold">
65		* ...
66		* </p>
67		* Such rules are applied to the element's content and then to the element itself until none of the rules more apply.
68		* Having applied all the rules to an element, it will have a style attribute with one or more properties. Other rules
69		* strip the element they apply to, replacing it by style properties on the contents, e.g. <dir>
70		* <li>
71		* <p>
72		* ...</li>
73		* </dir>.
74		* <p style="margin-left 1em">
75		* ... These rules are applied to an element before processing its content and replace the current element by the first
76		* element in the exposed content. After applying both sets of rules, you can replace the style attribute by a class
77		* value and style rule in the document head. To support this, an association of styles and class names is built. A
78		* naive approach is to rely on string matching to test when two property lists are the same. A better approach would be
79		* to first sort the properties before matching.
80		* @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
81		* @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
82		* @author Fabrizio Giustina
83		* @version $Revision: 802 $ ($Author: fgiust $)
84		*/
85		public class Clean
86		{
87
88		/**
89		* sequential number for generated css classes.
90		*/
91		private int classNum = 1;
92
93		/**
94		* Tag table.
95		*/
96		private TagTable tt;
97
98		/**
99		* Instantiates a new Clean.
100		* @param tagTable tag table instance
101		*/
102	219	public Clean(TagTable tagTable)
103		{
104	219	this.tt = tagTable;
105		}
106
107		/**
108		* Insert a css style property.
109		* @param props StyleProp instance
110		* @param name property name
111		* @param value property value
112		* @return StyleProp containin the given property
113		*/
114	2	private StyleProp insertProperty(StyleProp props, String name, String value)
115		{
116	2	StyleProp first, prev, prop;
117	2	int cmp;
118
119	2	prev = null;
120	2	first = props;
121
122	2	while (props != null)
123		{
124	1	cmp = props.name.compareTo(name);
125
126	1	if (cmp == 0)
127		{
128		// this property is already defined, ignore new value
129	0	return first;
130		}
131
132	1	if (cmp > 0) // props.name > name
133		{
134		// insert before this
135
136	1	prop = new StyleProp(name, value, props);
137
138	1	if (prev != null)
139		{
140	0	prev.next = prop;
141		}
142		else
143		{
144	1	first = prop;
145		}
146
147	1	return first;
148		}
149
150	0	prev = props;
151	0	props = props.next;
152		}
153
154	1	prop = new StyleProp(name, value, null);
155
156	1	if (prev != null)
157		{
158	0	prev.next = prop;
159		}
160		else
161		{
162	1	first = prop;
163		}
164
165	1	return first;
166		}
167
168		/**
169		* Create sorted linked list of properties from style string.
170		* @param prop StyleProp
171		* @param style style string
172		* @return StyleProp with given style
173		*/
174	2	private StyleProp createProps(StyleProp prop, String style)
175		{
176	2	int nameEnd;
177	2	int valueEnd;
178	2	int valueStart = 0;
179	2	int nameStart = 0;
180	2	boolean more;
181
182	2	nameStart = 0;
183	2	while (nameStart < style.length())
184		{
185	2	while (nameStart < style.length() && style.charAt(nameStart) == ' ')
186		{
187	0	++nameStart;
188		}
189
190	2	nameEnd = nameStart;
191
192	22	while (nameEnd < style.length())
193		{
194	22	if (style.charAt(nameEnd) == ':')
195		{
196	2	valueStart = nameEnd + 1;
197	2	break;
198		}
199
200	20	++nameEnd;
201		}
202
203	2	if (nameEnd >= style.length() \|\| style.charAt(nameEnd) != ':')
204		{
205	0	break;
206		}
207
208	2	while (valueStart < style.length() && style.charAt(valueStart) == ' ')
209		{
210	2	++valueStart;
211		}
212
213	2	valueEnd = valueStart;
214	2	more = false;
215
216	2	while (valueEnd < style.length())
217		{
218	26	if (style.charAt(valueEnd) == ';')
219		{
220	0	more = true;
221	0	break;
222		}
223
224	26	++valueEnd;
225		}
226
227	2	prop = insertProperty(prop, style.substring(nameStart, nameEnd), style.substring(valueStart, valueEnd));
228
229	2	if (more)
230		{
231	0	nameStart = valueEnd + 1;
232	0	continue;
233		}
234
235	2	break;
236		}
237
238	2	return prop;
239		}
240
241		/**
242		* Create a css property.
243		* @param props StyleProp
244		* @return css property as String
245		*/
246	1	private String createPropString(StyleProp props)
247		{
248	1	String style = "";
249	1	int len;
250	1	StyleProp prop;
251
252		// compute length
253	1	for (len = 0, prop = props; prop != null; prop = prop.next)
254		{
255	2	len += prop.name.length() + 2;
256	2	len += prop.value.length() + 2;
257		}
258
259	2	for (prop = props; prop != null; prop = prop.next)
260		{
261	2	style = style.concat(prop.name);
262	2	style = style.concat(": ");
263
264	2	style = style.concat(prop.value);
265
266	2	if (prop.next == null)
267		{
268	1	break;
269		}
270
271	1	style = style.concat("; ");
272		}
273
274	1	return style;
275		}
276
277		/**
278		* Creates a string with merged properties.
279		* @param style css style
280		* @param property css properties
281		* @return merged string
282		*/
283	1	private String addProperty(String style, String property)
284		{
285	1	StyleProp prop;
286
287	1	prop = createProps(null, style);
288	1	prop = createProps(prop, property);
289	1	style = createPropString(prop);
290	1	return style;
291		}
292
293		/**
294		* Generates a new css class name.
295		* @param lexer Lexer
296		* @param tag Tag
297		* @return generated css class
298		*/
299	1	private String gensymClass(Lexer lexer, String tag)
300		{
301	1	String str;
302
303	1	str = lexer.configuration.cssPrefix == null ? lexer.configuration.cssPrefix + this.classNum : "c"
304		+ this.classNum;
305	1	this.classNum++;
306	1	return str;
307		}
308
309		/**
310		* Finds a css style.
311		* @param lexer Lexer
312		* @param tag tag name
313		* @param properties css properties
314		* @return style string
315		*/
316	1	private String findStyle(Lexer lexer, String tag, String properties)
317		{
318	1	Style style;
319
320	1	for (style = lexer.styles; style != null; style = style.next)
321		{
322	0	if (style.tag.equals(tag) && style.properties.equals(properties))
323		{
324	0	return style.tagClass;
325		}
326		}
327
328	1	style = new Style(tag, gensymClass(lexer, tag), properties, lexer.styles);
329	1	lexer.styles = style;
330	1	return style.tagClass;
331		}
332
333		/**
334		* Find style attribute in node, and replace it by corresponding class attribute. Search for class in style
335		* dictionary otherwise gensym new class and add to dictionary. Assumes that node doesn't have a class attribute.
336		* @param lexer Lexer
337		* @param node node with a style attribute
338		*/
339	14	private void style2Rule(Lexer lexer, Node node)
340		{
341	14	AttVal styleattr, classattr;
342	14	String classname;
343
344	14	styleattr = node.getAttrByName("style");
345
346	14	if (styleattr != null)
347		{
348	1	classname = findStyle(lexer, node.element, styleattr.value);
349	1	classattr = node.getAttrByName("class");
350
351		// if there already is a class attribute then append class name after a space
352
353	1	if (classattr != null)
354		{
355	0	classattr.value = classattr.value + " " + classname;
356	0	node.removeAttribute(styleattr);
357		}
358		else
359		{
360		// reuse style attribute for class attribute
361	1	styleattr.attribute = "class";
362	1	styleattr.value = classname;
363		}
364		}
365		}
366
367		/**
368		* Adds a css rule for color.
369		* @param lexer Lexer
370		* @param selector css selector
371		* @param color color value
372		*/
373	0	private void addColorRule(Lexer lexer, String selector, String color)
374		{
375	0	if (color != null)
376		{
377	0	lexer.addStringLiteral(selector);
378	0	lexer.addStringLiteral(" { color: ");
379	0	lexer.addStringLiteral(color);
380	0	lexer.addStringLiteral(" }\n");
381		}
382		}
383
384		/**
385		* Move presentation attribs from body to style element.
386		*
387		* <pre>
388		* background="foo" . body { background-image: url(foo) }
389		* bgcolor="foo" . body { background-color: foo }
390		* text="foo" . body { color: foo }
391		* link="foo" . :link { color: foo }
392		* vlink="foo" . :visited { color: foo }
393		* alink="foo" . :active { color: foo }
394		* </pre>
395		*
396		* @param lexer Lexer
397		* @param body body node
398		*/
399	1	private void cleanBodyAttrs(Lexer lexer, Node body)
400		{
401	1	AttVal attr;
402	1	String bgurl = null;
403	1	String bgcolor = null;
404	1	String color = null;
405
406	1	attr = body.getAttrByName("background");
407
408	1	if (attr != null)
409		{
410	0	bgurl = attr.value;
411	0	attr.value = null;
412	0	body.removeAttribute(attr);
413		}
414
415	1	attr = body.getAttrByName("bgcolor");
416
417	1	if (attr != null)
418		{
419	0	bgcolor = attr.value;
420	0	attr.value = null;
421	0	body.removeAttribute(attr);
422		}
423
424	1	attr = body.getAttrByName("text");
425
426	1	if (attr != null)
427		{
428	1	color = attr.value;
429	1	attr.value = null;
430	1	body.removeAttribute(attr);
431		}
432
433	1	if (bgurl != null \|\| bgcolor != null \|\| color != null)
434		{
435	1	lexer.addStringLiteral(" body {\n");
436
437	1	if (bgurl != null)
438		{
439	0	lexer.addStringLiteral(" background-image: url(");
440	0	lexer.addStringLiteral(bgurl);
441	0	lexer.addStringLiteral(");\n");
442		}
443
444	1	if (bgcolor != null)
445		{
446	0	lexer.addStringLiteral(" background-color: ");
447	0	lexer.addStringLiteral(bgcolor);
448	0	lexer.addStringLiteral(";\n");
449		}
450
451	1	if (color != null)
452		{
453	1	lexer.addStringLiteral(" color: ");
454	1	lexer.addStringLiteral(color);
455	1	lexer.addStringLiteral(";\n");
456		}
457
458	1	lexer.addStringLiteral(" }\n");
459		}
460
461	1	attr = body.getAttrByName("link");
462
463	1	if (attr != null)
464		{
465	0	addColorRule(lexer, " :link", attr.value);
466	0	body.removeAttribute(attr);
467		}
468
469	1	attr = body.getAttrByName("vlink");
470
471	1	if (attr != null)
472		{
473	0	addColorRule(lexer, " :visited", attr.value);
474	0	body.removeAttribute(attr);
475		}
476
477	1	attr = body.getAttrByName("alink");
478
479	1	if (attr != null)
480		{
481	0	addColorRule(lexer, " :active", attr.value);
482	0	body.removeAttribute(attr);
483		}
484		}
485
486		/**
487		* Check deprecated attributes in body tag.
488		* @param lexer Lexer
489		* @param doc document root node
490		* @return <code>true</code> is the body doesn't contain deprecated attributes, false otherwise.
491		*/
492	0	private boolean niceBody(Lexer lexer, Node doc)
493		{
494	0	Node body = doc.findBody(lexer.configuration.tt);
495
496	0	if (body != null)
497		{
498	0	if (body.getAttrByName("background") != null
499		\|\| body.getAttrByName("bgcolor") != null
500		\|\| body.getAttrByName("text") != null
501		\|\| body.getAttrByName("link") != null
502		\|\| body.getAttrByName("vlink") != null
503		\|\| body.getAttrByName("alink") != null)
504		{
505	0	lexer.badLayout \|= Report.USING_BODY;
506	0	return false;
507		}
508		}
509
510	0	return true;
511		}
512
513		/**
514		* Create style element using rules from dictionary.
515		* @param lexer Lexer
516		* @param doc root node
517		*/
518	1	private void createStyleElement(Lexer lexer, Node doc)
519		{
520	1	Node node, head, body;
521	1	Style style;
522	1	AttVal av;
523
524	1	if (lexer.styles == null && niceBody(lexer, doc))
525		{
526	0	return;
527		}
528
529	1	node = lexer.newNode(Node.START_TAG, null, 0, 0, "style");
530	1	node.implicit = true;
531
532		// insert type attribute
533	1	av = new AttVal(null, null, '"', "type", "text/css");
534	1	av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
535	1	node.attributes = av;
536
537	1	body = doc.findBody(lexer.configuration.tt);
538
539	1	lexer.txtstart = lexer.lexsize;
540
541	1	if (body != null)
542		{
543	1	cleanBodyAttrs(lexer, body);
544		}
545
546	1	for (style = lexer.styles; style != null; style = style.next)
547		{
548	1	lexer.addCharToLexer(' ');
549	1	lexer.addStringLiteral(style.tag);
550	1	lexer.addCharToLexer('.');
551	1	lexer.addStringLiteral(style.tagClass);
552	1	lexer.addCharToLexer(' ');
553	1	lexer.addCharToLexer('{');
554	1	lexer.addStringLiteral(style.properties);
555	1	lexer.addCharToLexer('}');
556	1	lexer.addCharToLexer('\n');
557		}
558
559	1	lexer.txtend = lexer.lexsize;
560
561	1	node.insertNodeAtEnd(lexer.newNode(Node.TEXT_NODE, lexer.lexbuf, lexer.txtstart, lexer.txtend));
562
563		// now insert style element into document head doc is root node. search its children for html node the head
564		// node should be first child of html node
565
566	1	head = doc.findHEAD(lexer.configuration.tt);
567
568	1	if (head != null)
569		{
570	1	head.insertNodeAtEnd(node);
571		}
572		}
573
574		/**
575		* Ensure bidirectional links are consistent.
576		* @param node root node
577		*/
578	0	private void fixNodeLinks(Node node)
579		{
580	0	Node child;
581
582	0	if (node.prev != null)
583		{
584	0	node.prev.next = node;
585		}
586		else
587		{
588	0	node.parent.content = node;
589		}
590
591	0	if (node.next != null)
592		{
593	0	node.next.prev = node;
594		}
595		else
596		{
597	0	node.parent.last = node;
598		}
599
600	0	for (child = node.content; child != null; child = child.next)
601		{
602	0	child.parent = node;
603		}
604		}
605
606		/**
607		* Used to strip child of node when the node has one and only one child.
608		* @param node parent node
609		*/
610	6	private void stripOnlyChild(Node node)
611		{
612	6	Node child;
613
614	6	child = node.content;
615	6	node.content = child.content;
616	6	node.last = child.last;
617	6	child.content = null;
618
619	6	for (child = node.content; child != null; child = child.next)
620		{
621	7	child.parent = node;
622		}
623		}
624
625		/**
626		* Used to strip font start and end tags.
627		* @param element original node
628		* @param pnode passed in as array to allow modification. pnode[0] will contain the final node
629		* @todo remove the pnode parameter and make it a return value
630		*/
631	3	private void discardContainer(Node element, Node[] pnode)
632		{
633	3	Node node;
634	3	Node parent = element.parent;
635
636	3	if (element.content != null)
637		{
638	3	element.last.next = element.next;
639
640	3	if (element.next != null)
641		{
642	1	element.next.prev = element.last;
643	1	element.last.next = element.next;
644		}
645		else
646		{
647	2	parent.last = element.last;
648		}
649
650	3	if (element.prev != null)
651		{
652	0	element.content.prev = element.prev;
653	0	element.prev.next = element.content;
654		}
655		else
656		{
657	3	parent.content = element.content;
658		}
659
660	3	for (node = element.content; node != null; node = node.next)
661		{
662	6	node.parent = parent;
663		}
664
665	3	pnode[0] = element.content;
666		}
667		else
668		{
669	0	if (element.next != null)
670		{
671	0	element.next.prev = element.prev;
672		}
673		else
674		{
675	0	parent.last = element.prev;
676		}
677
678	0	if (element.prev != null)
679		{
680	0	element.prev.next = element.next;
681		}
682		else
683		{
684	0	parent.content = element.next;
685		}
686
687	0	pnode[0] = element.next;
688		}
689
690	3	element.next = null;
691	3	element.content = null;
692		}
693
694		/**
695		* Add style property to element, creating style attribute as needed and adding ; delimiter.
696		* @param node node
697		* @param property property added to node
698		*/
699	5	private void addStyleProperty(Node node, String property)
700		{
701	5	AttVal av;
702
703	5	for (av = node.attributes; av != null; av = av.next)
704		{
705	4	if (av.attribute.equals("style"))
706		{
707	1	break;
708		}
709		}
710
711		// if style attribute already exists then insert property
712
713	5	if (av != null)
714		{
715	1	String s;
716
717	1	s = addProperty(av.value, property);
718	1	av.value = s;
719		}
720		else
721		{
722		// else create new style attribute
723	4	av = new AttVal(node.attributes, null, '"', "style", property);
724	4	av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
725	4	node.attributes = av;
726		}
727		}
728
729		/**
730		* Create new string that consists of the combined style properties in s1 and s2. To merge property lists, we build
731		* a linked list of property/values and insert properties into the list in order, merging values for the same
732		* property name.
733		* @param s1 first property
734		* @param s2 second property
735		* @return merged properties
736		*/
737	0	private String mergeProperties(String s1, String s2)
738		{
739	0	String s;
740	0	StyleProp prop;
741
742	0	prop = createProps(null, s1);
743	0	prop = createProps(prop, s2);
744	0	s = createPropString(prop);
745	0	return s;
746		}
747
748		/**
749		* Merge class attributes from 2 nodes.
750		* @param node Node
751		* @param child Child node
752		*/
753	3	private void mergeClasses(Node node, Node child)
754		{
755	3	AttVal av;
756	3	String s1, s2, names;
757
758	3	for (s2 = null, av = child.attributes; av != null; av = av.next)
759		{
760	1	if ("class".equals(av.attribute))
761		{
762	1	s2 = av.value;
763	1	break;
764		}
765		}
766
767	3	for (s1 = null, av = node.attributes; av != null; av = av.next)
768		{
769	1	if ("class".equals(av.attribute))
770		{
771	0	s1 = av.value;
772	0	break;
773		}
774		}
775
776	3	if (s1 != null)
777		{
778	0	if (s2 != null) // merge class names from both
779		{
780	0	names = s1 + ' ' + s2;
781	0	av.value = names;
782		}
783		}
784	3	else if (s2 != null) // copy class names from child
785		{
786	1	av = new AttVal(node.attributes, null, '"', "class", s2);
787	1	av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
788	1	node.attributes = av;
789		}
790		}
791
792		/**
793		* Merge style from 2 nodes.
794		* @param node Node
795		* @param child Child node
796		*/
797	3	private void mergeStyles(Node node, Node child)
798		{
799	3	AttVal av;
800	3	String s1, s2, style;
801
802		// the child may have a class attribute used for attaching styles, if so the class name needs to be copied to
803		// node's class
804	3	mergeClasses(node, child);
805
806	3	for (s2 = null, av = child.attributes; av != null; av = av.next)
807		{
808	1	if (av.attribute.equals("style"))
809		{
810	0	s2 = av.value;
811	0	break;
812		}
813		}
814
815	3	for (s1 = null, av = node.attributes; av != null; av = av.next)
816		{
817	2	if (av.attribute.equals("style"))
818		{
819	0	s1 = av.value;
820	0	break;
821		}
822		}
823
824	3	if (s1 != null)
825		{
826	0	if (s2 != null) // merge styles from both
827		{
828	0	style = mergeProperties(s1, s2);
829	0	av.value = style;
830		}
831		}
832	3	else if (s2 != null) // copy style of child
833		{
834	0	av = new AttVal(node.attributes, null, '"', "style", s2);
835	0	av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
836	0	node.attributes = av;
837		}
838		}
839
840		/**
841		* Map a % font size to a named font size.
842		* @param size size in %
843		* @return font size name
844		*/
845	2	private String fontSize2Name(String size)
846		{
847	2	String[] sizes = {"60%", "70%", "80%", null, "120%", "150%", "200%"};
848	2	String buf;
849
850	2	if (size.length() > 0 && '0' <= size.charAt(0) && size.charAt(0) <= '6')
851		{
852	0	int n = size.charAt(0) - '0';
853	0	return sizes[n];
854		}
855
856	2	if (size.length() > 0 && size.charAt(0) == '-')
857		{
858	2	if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
859		{
860	2	int n = size.charAt(1) - '0';
861	2	double x;
862
863	2	for (x = 1.0; n > 0; --n)
864		{
865	4	x *= 0.8;
866		}
867
868	2	x *= 100.0;
869	2	buf = "" + (int) x + "%";
870
871	2	return buf;
872		}
873
874	0	return "smaller"; /* "70%"; */
875		}
876
877	0	if (size.length() > 1 && '0' <= size.charAt(1) && size.charAt(1) <= '6')
878		{
879	0	int n = size.charAt(1) - '0';
880	0	double x;
881
882	0	for (x = 1.0; n > 0; --n)
883		{
884	0	x *= 1.2;
885		}
886
887	0	x *= 100.0;
888	0	buf = "" + (int) x + "%";
889
890	0	return buf;
891		}
892
893	0	return "larger"; /* "140%" */
894		}
895
896		/**
897		* Adds a font-family style.
898		* @param node Node
899		* @param face font face
900		*/
901	1	private void addFontFace(Node node, String face)
902		{
903	1	addStyleProperty(node, "font-family: " + face);
904		}
905
906		/**
907		* Adds a font size style.
908		* @param node Node
909		* @param size font size
910		*/
911	2	private void addFontSize(Node node, String size)
912		{
913	2	if (size == null)
914		{
915	0	return;
916		}
917
918	2	if ("6".equals(size) && node.tag == this.tt.tagP)
919		{
920	0	node.element = "h1";
921	0	this.tt.findTag(node);
922	0	return;
923		}
924
925	2	if ("5".equals(size) && node.tag == this.tt.tagP)
926		{
927	0	node.element = "h2";
928	0	this.tt.findTag(node);
929	0	return;
930		}
931
932	2	if ("4".equals(size) && node.tag == this.tt.tagP)
933		{
934	0	node.element = "h3";
935	0	this.tt.findTag(node);
936	0	return;
937		}
938
939	2	String value = fontSize2Name(size);
940
941	2	if (value != null)
942		{
943	2	addStyleProperty(node, "font-size: " + value);
944		}
945		}
946
947		/**
948		* Adds a font color style.
949		* @param node Node
950		* @param color color value
951		*/
952	0	private void addFontColor(Node node, String color)
953		{
954	0	addStyleProperty(node, "color: " + color);
955		}
956
957		/**
958		* Adds an align style.
959		* @param node Node
960		* @param align align value
961		*/
962	0	private void addAlign(Node node, String align)
963		{
964		// force alignment value to lower case
965	0	addStyleProperty(node, "text-align: " + align.toLowerCase());
966		}
967
968		/**
969		* Add style properties to node corresponding to the font face, size and color attributes.
970		* @param node font tag
971		* @param av attribute list for node
972		*/
973	2	private void addFontStyles(Node node, AttVal av)
974		{
975	2	while (av != null)
976		{
977	3	if (av.attribute.equals("face"))
978		{
979	1	addFontFace(node, av.value);
980		}
981	2	else if (av.attribute.equals("size"))
982		{
983	2	addFontSize(node, av.value);
984		}
985	0	else if (av.attribute.equals("color"))
986		{
987	0	addFontColor(node, av.value);
988		}
989
990	3	av = av.next;
991		}
992		}
993
994		/**
995		* Symptom: <code><p align=center></code>. Action: <code><p style="text-align: center"></code>.
996		* @param lexer Lexer
997		* @param node node with center attribute. Will be modified to use css style.
998		*/
999	40	private void textAlign(Lexer lexer, Node node)
1000		{
1001	40	AttVal av, prev;
1002
1003	40	prev = null;
1004
1005	40	for (av = node.attributes; av != null; av = av.next)
1006		{
1007	17	if (av.attribute.equals("align"))
1008		{
1009	0	if (prev != null)
1010		{
1011	0	prev.next = av.next;
1012		}
1013		else
1014		{
1015	0	node.attributes = av.next;
1016		}
1017
1018	0	if (av.value != null)
1019		{
1020	0	addAlign(node, av.value);
1021		}
1022
1023	0	break;
1024		}
1025
1026	17	prev = av;
1027		}
1028		}
1029
1030		/**
1031		* Symptom: <code><dir><li></code> where <code><li></code> is only child. Action: coerce
1032		* <code><dir> <li></code> to <code><div></code> with indent. The clean up rules use the pnode argument
1033		* to return the next node when the original node has been deleted.
1034		* @param lexer Lexer
1035		* @param node dir tag
1036		* @return <code>true</code> if a dir tag has been coerced to a div
1037		*/
1038	673	private boolean dir2Div(Lexer lexer, Node node)
1039		{
1040	673	Node child;
1041
1042	673	if (node.tag == this.tt.tagDir \|\| node.tag == this.tt.tagUl \|\| node.tag == this.tt.tagOl)
1043		{
1044	1	child = node.content;
1045
1046	1	if (child == null)
1047		{
1048	0	return false;
1049		}
1050
1051		// check child has no peers
1052	1	if (child.next != null)
1053		{
1054	1	return false;
1055		}
1056
1057	0	if (child.tag != this.tt.tagLi)
1058		{
1059	0	return false;
1060		}
1061
1062	0	if (!child.implicit)
1063		{
1064	0	return false;
1065		}
1066
1067		// coerce dir to div
1068	0	node.tag = this.tt.tagDiv;
1069	0	node.element = "div";
1070	0	addStyleProperty(node, "margin-left: 2em");
1071	0	stripOnlyChild(node);
1072	0	return true;
1073		}
1074
1075	672	return false;
1076		}
1077
1078		/**
1079		* Symptom:
1080		*
1081		* <pre>
1082		* <center>
1083		* </pre>.
1084		* <p>
1085		* Action: replace <code><center></code> by <code><div style="text-align: center"></code>
1086		* </p>
1087		* @param lexer Lexer
1088		* @param node center tag
1089		* @param pnode pnode[0] is the same as node, passed in as an array to allow modification
1090		* @return <code>true</code> if a center tag has been replaced by a div
1091		*/
1092	673	private boolean center2Div(Lexer lexer, Node node, Node[] pnode)
1093		{
1094	673	if (node.tag == this.tt.tagCenter)
1095		{
1096	0	if (lexer.configuration.dropFontTags)
1097		{
1098	0	if (node.content != null)
1099		{
1100	0	Node last = node.last;
1101	0	Node parent = node.parent;
1102
1103	0	discardContainer(node, pnode);
1104
1105	0	node = lexer.inferredTag("br");
1106
1107	0	if (last.next != null)
1108		{
1109	0	last.next.prev = node;
1110		}
1111
1112	0	node.next = last.next;
1113	0	last.next = node;
1114	0	node.prev = last;
1115
1116	0	if (parent.last == last)
1117		{
1118	0	parent.last = node;
1119		}
1120
1121	0	node.parent = parent;
1122		}
1123		else
1124		{
1125	0	Node prev = node.prev;
1126	0	Node next = node.next;
1127	0	Node parent = node.parent;
1128	0	discardContainer(node, pnode);
1129
1130	0	node = lexer.inferredTag("br");
1131	0	node.next = next;
1132	0	node.prev = prev;
1133	0	node.parent = parent;
1134
1135	0	if (next != null)
1136		{
1137	0	next.prev = node;
1138		}
1139		else
1140		{
1141	0	parent.last = node;
1142		}
1143
1144	0	if (prev != null)
1145		{
1146	0	prev.next = node;
1147		}
1148		else
1149		{
1150	0	parent.content = node;
1151		}
1152		}
1153
1154	0	return true;
1155		}
1156	0	node.tag = this.tt.tagDiv;
1157	0	node.element = "div";
1158	0	addStyleProperty(node, "text-align: center");
1159	0	return true;
1160		}
1161
1162	673	return false;
1163		}
1164
1165		/**
1166		* Symptom: <code><div><div>...</div></div></code> Action: merge the two divs. This is useful after
1167		* nested <dir>s used by Word for indenting have been converted to <div>s.
1168		* @param lexer Lexer
1169		* @param node first div
1170		* @return true if the divs have been merged
1171		*/
1172	673	private boolean mergeDivs(Lexer lexer, Node node)
1173		{
1174	673	Node child;
1175
1176	673	if (node.tag != this.tt.tagDiv)
1177		{
1178	668	return false;
1179		}
1180
1181	5	child = node.content;
1182
1183	5	if (child == null)
1184		{
1185	0	return false;
1186		}
1187
1188	5	if (child.tag != this.tt.tagDiv)
1189		{
1190	4	return false;
1191		}
1192
1193	1	if (child.next != null)
1194		{
1195	0	return false;
1196		}
1197
1198	1	mergeStyles(node, child);
1199	1	stripOnlyChild(node);
1200	1	return true;
1201		}
1202
1203		/**
1204		* Symptom:
1205		* <ul>
1206		* <li>
1207		* <ul>
1208		* ...
1209		* </ul>
1210		* </li>
1211		* </ul>
1212		* Action: discard outer list.
1213		* @param lexer Lexer
1214		* @param node Node
1215		* @param pnode passed in as array to allow modifications.
1216		* @return <code>true</code> if nested lists have been found and replaced
1217		*/
1218	673	private boolean nestedList(Lexer lexer, Node node, Node[] pnode)
1219		{
1220	673	Node child, list;
1221
1222	673	if (node.tag == this.tt.tagUl \|\| node.tag == this.tt.tagOl)
1223		{
1224	1	child = node.content;
1225
1226	1	if (child == null)
1227		{
1228	0	return false;
1229		}
1230
1231		// check child has no peers
1232
1233	1	if (child.next != null)
1234		{
1235	1	return false;
1236		}
1237
1238	0	list = child.content;
1239
1240	0	if (list == null)
1241		{
1242	0	return false;
1243		}
1244
1245	0	if (list.tag != node.tag)
1246		{
1247	0	return false;
1248		}
1249
1250	0	pnode[0] = list; // Set node to resume iteration
1251
1252		// move inner list node into position of outer node
1253	0	list.prev = node.prev;
1254	0	list.next = node.next;
1255	0	list.parent = node.parent;
1256	0	fixNodeLinks(list);
1257
1258		// get rid of outer ul and its li
1259		// XXX: Are we leaking the child node? -creitzel 7 Jun, 01
1260	0	child.content = null;
1261	0	node.content = null;
1262	0	node.next = null;
1263	0	node = null;
1264
1265		// If prev node was a list the chances are this node should be appended to that list. Word has no way of
1266		// recognizing nested lists and just uses indents
1267	0	if (list.prev != null)
1268		{
1269	0	if (list.prev.tag == this.tt.tagUl \|\| list.prev.tag == this.tt.tagOl)
1270		{
1271
1272	0	node = list;
1273	0	list = node.prev;
1274
1275	0	list.next = node.next;
1276
1277	0	if (list.next != null)
1278		{
1279	0	list.next.prev = list;
1280		}
1281
1282	0	child = list.last; /* <li> */
1283
1284	0	node.parent = child;
1285	0	node.next = null;
1286	0	node.prev = child.last;
1287	0	fixNodeLinks(node);
1288	0	cleanNode(lexer, node);
1289		}
1290		}
1291
1292	0	return true;
1293		}
1294
1295	672	return false;
1296		}
1297
1298		/**
1299		* Symptom: the only child of a block-level element is a presentation element such as B, I or FONT. Action: add
1300		* style "font-weight: bold" to the block and strip the <b>element, leaving its children. example:
1301		*
1302		* <pre>
1303		* <p>
1304		* <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
1305		* </p>
1306		* </pre>
1307		*
1308		* becomes:
1309		*
1310		* <pre>
1311		* <p style="font-weight: bold; font-family: Arial; font-size: 6">
1312		* Draft Recommended Practice
1313		* </p>
1314		* </pre>
1315		*
1316		* <p>
1317		* This code also replaces the align attribute by a style attribute. However, to avoid CSS problems with Navigator
1318		* 4, this isn't done for the elements: caption, tr and table
1319		* </p>
1320		* @param lexer Lexer
1321		* @param node parent node
1322		* @return <code>true</code> if the child node has been removed
1323		*/
1324	672	private boolean blockStyle(Lexer lexer, Node node)
1325		{
1326	672	Node child;
1327
1328	672	if ((node.tag.model & (Dict.CM_BLOCK \| Dict.CM_LIST \| Dict.CM_DEFLIST \| Dict.CM_TABLE)) != 0)
1329		{
1330	55	if (node.tag != this.tt.tagTable && node.tag != this.tt.tagTr && node.tag != this.tt.tagLi)
1331		{
1332		// check for align attribute
1333	40	if (node.tag != this.tt.tagCaption)
1334		{
1335	40	textAlign(lexer, node);
1336		}
1337
1338	40	child = node.content;
1339
1340	40	if (child == null)
1341		{
1342	6	return false;
1343		}
1344
1345		// check child has no peers
1346	34	if (child.next != null)
1347		{
1348	12	return false;
1349		}
1350
1351	22	if (child.tag == this.tt.tagB)
1352		{
1353	2	mergeStyles(node, child);
1354	2	addStyleProperty(node, "font-weight: bold");
1355	2	stripOnlyChild(node);
1356	2	return true;
1357		}
1358
1359	20	if (child.tag == this.tt.tagI)
1360		{
1361	0	mergeStyles(node, child);
1362	0	addStyleProperty(node, "font-style: italic");
1363	0	stripOnlyChild(node);
1364	0	return true;
1365		}
1366
1367	20	if (child.tag == this.tt.tagFont)
1368		{
1369	0	mergeStyles(node, child);
1370	0	addFontStyles(node, child.attributes);
1371	0	stripOnlyChild(node);
1372	0	return true;
1373		}
1374		}
1375		}
1376
1377	652	return false;
1378		}
1379
1380		/**
1381		* If the node has only one b, i, or font child remove the child node and add the appropriate style attributes to
1382		* parent.
1383		* @param lexer Lexer
1384		* @param node parent node
1385		* @param pnode passed as an array to allow modifications
1386		* @return <code>true</code> if child node has been stripped, replaced by style attributes.
1387		*/
1388	670	private boolean inlineStyle(Lexer lexer, Node node, Node[] pnode)
1389		{
1390	670	Node child;
1391
1392	670	if (node.tag != this.tt.tagFont && (node.tag.model & (Dict.CM_INLINE \| Dict.CM_ROW)) != 0)
1393		{
1394	543	child = node.content;
1395
1396	543	if (child == null)
1397		{
1398	264	return false;
1399		}
1400
1401		// check child has no peers
1402	279	if (child.next != null)
1403		{
1404	5	return false;
1405		}
1406
1407	274	if (child.tag == this.tt.tagB && lexer.configuration.logicalEmphasis)
1408		{
1409	0	mergeStyles(node, child);
1410	0	addStyleProperty(node, "font-weight: bold");
1411	0	stripOnlyChild(node);
1412	0	return true;
1413		}
1414
1415	274	if (child.tag == this.tt.tagI && lexer.configuration.logicalEmphasis)
1416		{
1417	0	mergeStyles(node, child);
1418	0	addStyleProperty(node, "font-style: italic");
1419	0	stripOnlyChild(node);
1420	0	return true;
1421		}
1422
1423	274	if (child.tag == this.tt.tagFont)
1424		{
1425	0	mergeStyles(node, child);
1426	0	addFontStyles(node, child.attributes);
1427	0	stripOnlyChild(node);
1428	0	return true;
1429		}
1430		}
1431
1432	401	return false;
1433		}
1434
1435		/**
1436		* Replace font elements by span elements, deleting the font element's attributes and replacing them by a single
1437		* style attribute.
1438		* @param lexer Lexer
1439		* @param node font tag
1440		* @param pnode passed as an array to allow modifications
1441		* @return <code>true</code> if a font tag has been dropped and replaced by style attributes
1442		*/
1443	670	private boolean font2Span(Lexer lexer, Node node, Node[] pnode)
1444		{
1445	670	AttVal av, style, next;
1446
1447	670	if (node.tag == this.tt.tagFont)
1448		{
1449	4	if (lexer.configuration.dropFontTags)
1450		{
1451	2	discardContainer(node, pnode);
1452	2	return false;
1453		}
1454
1455		// if FONT is only child of parent element then leave alone
1456	2	if (node.parent.content == node && node.next == null)
1457		{
1458	0	return false;
1459		}
1460
1461	2	addFontStyles(node, node.attributes);
1462
1463		// extract style attribute and free the rest
1464	2	av = node.attributes;
1465	2	style = null;
1466
1467	2	while (av != null)
1468		{
1469	5	next = av.next;
1470
1471	5	if (av.attribute.equals("style"))
1472		{
1473	2	av.next = null;
1474	2	style = av;
1475		}
1476
1477	5	av = next;
1478		}
1479
1480	2	node.attributes = style;
1481
1482	2	node.tag = this.tt.tagSpan;
1483	2	node.element = "span";
1484
1485	2	return true;
1486		}
1487
1488	666	return false;
1489		}
1490
1491		/**
1492		* Applies all matching rules to a node.
1493		* @param lexer Lexer
1494		* @param node original node
1495		* @return cleaned up node
1496		*/
1497	1833	private Node cleanNode(Lexer lexer, Node node)
1498		{
1499	1833	Node next = null;
1500	1833	Node[] o = new Node[1];
1501	1833	boolean b = false;
1502
1503	1833	for (next = node; node != null && node.isElement(); node = next)
1504		{
1505	673	o[0] = next;
1506
1507	673	b = dir2Div(lexer, node);
1508	673	next = o[0];
1509	673	if (b)
1510		{
1511	0	continue;
1512		}
1513
1514		// Special case: true result means that arg node and its parent no longer exist.
1515		// So we must jump back up the CreateStyleProperties() call stack until we have a valid node reference.
1516	673	b = nestedList(lexer, node, o);
1517	673	next = o[0];
1518	673	if (b)
1519		{
1520	0	return next;
1521		}
1522
1523	673	b = center2Div(lexer, node, o);
1524	673	next = o[0];
1525	673	if (b)
1526		{
1527	0	continue;
1528		}
1529
1530	673	b = mergeDivs(lexer, node);
1531	673	next = o[0];
1532	673	if (b)
1533		{
1534	1	continue;
1535		}
1536
1537	672	b = blockStyle(lexer, node);
1538	672	next = o[0];
1539	672	if (b)
1540		{
1541	2	continue;
1542		}
1543
1544	670	b = inlineStyle(lexer, node, o);
1545	670	next = o[0];
1546	670	if (b)
1547		{
1548	0	continue;
1549		}
1550
1551	670	b = font2Span(lexer, node, o);
1552	670	next = o[0];
1553	670	if (b)
1554		{
1555	2	continue;
1556		}
1557
1558	668	break;
1559		}
1560
1561	1833	return next;
1562		}
1563
1564		/**
1565		* Special case: if the current node is destroyed by CleanNode() lower in the tree, this node and its parent no
1566		* longer exist. So we must jump back up the CreateStyleProperties() call stack until we have a valid node
1567		* reference.
1568		* @param lexer Lexer
1569		* @param node Node
1570		* @param prepl passed in as array to allow modifications
1571		* @return cleaned Node
1572		*/
1573	1833	private Node createStyleProperties(Lexer lexer, Node node, Node[] prepl)
1574		{
1575	1833	Node child = node.content;
1576
1577	1833	if (child != null)
1578		{
1579	408	Node[] repl = new Node[1];
1580	408	repl[0] = node;
1581	408	while (child != null)
1582		{
1583	1816	child = createStyleProperties(lexer, child, repl);
1584	1816	if (repl[0] != node)
1585		{
1586	0	return repl[0];
1587		}
1588	1816	if (child != null)
1589		{
1590	1816	child = child.next;
1591		}
1592		}
1593		}
1594
1595	1833	return cleanNode(lexer, node);
1596		}
1597
1598		/**
1599		* Find style attribute in node content, and replace it by corresponding class attribute.
1600		* @param lexer Lexer
1601		* @param node parent node
1602		*/
1603	14	private void defineStyleRules(Lexer lexer, Node node)
1604		{
1605	14	Node child;
1606
1607	14	if (node.content != null)
1608		{
1609	8	child = node.content;
1610	8	while (child != null)
1611		{
1612	13	defineStyleRules(lexer, child);
1613	13	child = child.next;
1614		}
1615		}
1616
1617	14	style2Rule(lexer, node);
1618		}
1619
1620		/**
1621		* Clean an html tree.
1622		* @param lexer Lexer
1623		* @param doc root node
1624		*/
1625	17	public void cleanTree(Lexer lexer, Node doc)
1626		{
1627	17	Node[] repl = new Node[1];
1628	17	repl[0] = doc;
1629	17	doc = createStyleProperties(lexer, doc, repl);
1630
1631	17	if (!lexer.configuration.makeClean)
1632		{
1633	1	defineStyleRules(lexer, doc);
1634	1	createStyleElement(lexer, doc);
1635		}
1636		}
1637
1638		/**
1639		* simplifies <b><b>... </b> ... </b> etc.
1640		* @param node root Node
1641		*/
1642	5567	public void nestedEmphasis(Node node)
1643		{
1644	5567	Node[] o = new Node[1];
1645	5567	Node next;
1646
1647	5567	while (node != null)
1648		{
1649	12722	next = node.next;
1650
1651	12722	if ((node.tag == this.tt.tagB \|\| node.tag == this.tt.tagI)
1652		&& node.parent != null
1653		&& node.parent.tag == node.tag)
1654		{
1655		// strip redundant inner element
1656	1	o[0] = next;
1657	1	discardContainer(node, o);
1658	1	next = o[0];
1659	1	node = next;
1660	1	continue;
1661		}
1662
1663	12721	if (node.content != null)
1664		{
1665	5348	nestedEmphasis(node.content);
1666		}
1667
1668	12721	node = next;
1669		}
1670		}
1671
1672		/**
1673		* Replace i by em and b by strong.
1674		* @param node root Node
1675		*/
1676	136	public void emFromI(Node node)
1677		{
1678	136	while (node != null)
1679		{
1680	256	if (node.tag == this.tt.tagI)
1681		{
1682	1	node.element = this.tt.tagEm.name;
1683	1	node.tag = this.tt.tagEm;
1684		}
1685	255	else if (node.tag == this.tt.tagB)
1686		{
1687	0	node.element = this.tt.tagStrong.name;
1688	0	node.tag = this.tt.tagStrong;
1689		}
1690
1691	256	if (node.content != null)
1692		{
1693	127	emFromI(node.content);
1694		}
1695
1696	256	node = node.next;
1697		}
1698		}
1699
1700		/**
1701		* Some people use dir or ul without an li to indent the content. The pattern to look for is a list with a single
1702		* implicit li. This is recursively replaced by an implicit blockquote.
1703		* @param node root Node
1704		*/
1705	5567	public void list2BQ(Node node)
1706		{
1707	5567	while (node != null)
1708		{
1709	12721	if (node.content != null)
1710		{
1711	5348	list2BQ(node.content);
1712		}
1713
1714	12721	if (node.tag != null
1715		&& node.tag.getParser() == ParserImpl.LIST
1716		&& node.hasOneChild()
1717		&& node.content.implicit)
1718		{
1719	3	stripOnlyChild(node);
1720	3	node.element = this.tt.tagBlockquote.name;
1721	3	node.tag = this.tt.tagBlockquote;
1722	3	node.implicit = true;
1723		}
1724
1725	12721	node = node.next;
1726		}
1727		}
1728
1729		/**
1730		* Replace implicit blockquote by div with an indent taking care to reduce nested blockquotes to a single div with
1731		* the indent set to match the nesting depth.
1732		* @param node root Node
1733		*/
1734	5564	public void bQ2Div(Node node)
1735		{
1736	5564	int indent;
1737	5564	String indentBuf;
1738	5564	AttVal attval;
1739
1740	5564	while (node != null)
1741		{
1742	12718	if (node.tag == this.tt.tagBlockquote && node.implicit)
1743		{
1744	3	indent = 1;
1745
1746	3	while (node.hasOneChild() && node.content.tag == this.tt.tagBlockquote && node.implicit)
1747		{
1748	0	++indent;
1749	0	stripOnlyChild(node);
1750		}
1751
1752	3	if (node.content != null)
1753		{
1754	3	bQ2Div(node.content);
1755		}
1756
1757	3	indentBuf = "margin-left: " + (new Integer(2 * indent)).toString() + "em";
1758
1759	3	node.element = this.tt.tagDiv.name;
1760	3	node.tag = this.tt.tagDiv;
1761
1762	3	attval = node.getAttrByName("style");
1763
1764	3	if (attval != null && attval.value != null)
1765		{
1766	2	attval.value = indentBuf + "; " + attval.value;
1767		}
1768		else
1769		{
1770	1	node.addAttribute("style", indentBuf);
1771		}
1772		}
1773	12715	else if (node.content != null)
1774		{
1775	5342	bQ2Div(node.content);
1776		}
1777
1778	12718	node = node.next;
1779		}
1780		}
1781
1782		/**
1783		* Find the enclosing table cell for the given node.
1784		* @param node Node
1785		* @return enclosing cell node
1786		*/
1787	0	Node findEnclosingCell(Node node)
1788		{
1789	0	Node check;
1790
1791	0	for (check = node; check != null; check = check.parent)
1792		{
1793	0	if (check.tag == tt.tagTd)
1794		{
1795	0	return check;
1796		}
1797		}
1798	0	return null;
1799		}
1800
1801		/**
1802		* node is <code><![if ...]></code> prune up to <code><![endif]></code>.
1803		* @param lexer Lexer
1804		* @param node Node
1805		* @return cleaned up Node
1806		*/
1807	23	public Node pruneSection(Lexer lexer, Node node)
1808		{
1809	23	for (;;)
1810		{
1811
1812		// FG: commented out - don't add   to empty cells
1813
1814		// if ((Lexer.getString(node.textarray, node.start, 21)).equals("if !supportEmptyParas"))
1815		// {
1816		// Node cell = findEnclosingCell(node);
1817		// if (cell != null)
1818		// {
1819		// // Need to put   into cell so it doesn't look weird
1820		// char onesixty[] = {(char) 160, (char) 0};
1821		// Node nbsp = lexer.newLiteralTextNode(lexer, onesixty);
1822		// Node.insertNodeBeforeElement(node, nbsp);
1823		// }
1824		// }
1825
1826		// discard node and returns next
1827	62	node = Node.discardElement(node);
1828
1829	62	if (node == null)
1830		{
1831	0	return null;
1832		}
1833
1834	62	if (node.type == Node.SECTION_TAG)
1835		{
1836	23	if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if"))
1837		{
1838	0	node = pruneSection(lexer, node);
1839	0	continue;
1840		}
1841
1842	23	if ((TidyUtils.getString(node.textarray, node.start, 5)).equals("endif"))
1843		{
1844	23	node = Node.discardElement(node);
1845	23	break;
1846		}
1847		}
1848		}
1849
1850	23	return node;
1851		}
1852
1853		/**
1854		* Drop if/endif sections inserted by word2000.
1855		* @param lexer Lexer
1856		* @param node Node root node
1857		*/
1858	130	public void dropSections(Lexer lexer, Node node)
1859		{
1860	130	while (node != null)
1861		{
1862	253	if (node.type == Node.SECTION_TAG)
1863		{
1864		// prune up to matching endif
1865	25	if ((TidyUtils.getString(node.textarray, node.start, 2)).equals("if")
1866		&& (!(TidyUtils.getString(node.textarray, node.start, 7)).equals("if !vml"))) // #444394 - fix 13
1867		// Sep 01
1868		{
1869	23	node = pruneSection(lexer, node);
1870	23	continue;
1871		}
1872
1873		// discard others as well
1874	2	node = Node.discardElement(node);
1875	2	continue;
1876		}
1877
1878	228	if (node.content != null)
1879		{
1880	124	dropSections(lexer, node.content);
1881		}
1882
1883	228	node = node.next;
1884		}
1885		}
1886
1887		/**
1888		* Remove word2000 attributes from node.
1889		* @param node node to cleanup
1890		*/
1891	156	public void purgeWord2000Attributes(Node node)
1892		{
1893	156	AttVal attr = null;
1894	156	AttVal next = null;
1895	156	AttVal prev = null;
1896
1897	156	for (attr = node.attributes; attr != null; attr = next)
1898		{
1899	96	next = attr.next;
1900
1901		// special check for class="Code" denoting pre text
1902		// Pass thru user defined styles as HTML class names
1903	96	if (attr.attribute != null && attr.value != null && attr.attribute.equals("class"))
1904		{
1905	45	if (attr.value.equals("Code") \|\| !attr.value.startsWith("Mso"))
1906		{
1907	3	prev = attr;
1908	3	continue;
1909		}
1910		}
1911
1912	93	if (attr.attribute != null
1913		&& (attr.attribute.equals("class")
1914		\|\| attr.attribute.equals("style")
1915		\|\| attr.attribute.equals("lang")
1916		\|\| attr.attribute.startsWith("x:") \|\| ((attr.attribute.equals("height") \|\| attr.attribute
1917		.equals("width")) && //
1918		(node.tag == this.tt.tagTd \|\| node.tag == this.tt.tagTr \|\| node.tag == this.tt.tagTh))))
1919		{
1920	79	if (prev != null)
1921		{
1922	4	prev.next = next;
1923		}
1924		else
1925		{
1926	75	node.attributes = next;
1927		}
1928
1929		}
1930		else
1931		{
1932	14	prev = attr;
1933		}
1934		}
1935		}
1936
1937		/**
1938		* Word2000 uses span excessively, so we strip span out.
1939		* @param lexer Lexer
1940		* @param span Node span
1941		* @return cleaned node
1942		*/
1943	32	public Node stripSpan(Lexer lexer, Node span)
1944		{
1945	32	Node node;
1946	32	Node prev = null;
1947	32	Node content;
1948
1949		// deal with span elements that have content by splicing the content in place of the span after having
1950		// processed it
1951
1952	32	cleanWord2000(lexer, span.content);
1953	32	content = span.content;
1954
1955	32	if (span.prev != null)
1956		{
1957	14	prev = span.prev;
1958		}
1959	18	else if (content != null)
1960		{
1961	12	node = content;
1962	12	content = content.next;
1963	12	node.removeNode();
1964	12	Node.insertNodeBeforeElement(span, node);
1965	12	prev = node;
1966		}
1967
1968	32	while (content != null)
1969		{
1970	28	node = content;
1971	28	content = content.next;
1972	28	node.removeNode();
1973	28	prev.insertNodeAfterElement(node);
1974	28	prev = node;
1975		}
1976
1977	32	if (span.next == null)
1978		{
1979	19	span.parent.last = prev;
1980		}
1981
1982	32	node = span.next;
1983	32	span.content = null;
1984	32	Node.discardElement(span);
1985	32	return node;
1986		}
1987
1988		/**
1989		* Map non-breaking spaces to regular spaces.
1990		* @param lexer Lexer
1991		* @param node Node
1992		*/
1993	0	private void normalizeSpaces(Lexer lexer, Node node)
1994		{
1995	0	while (node != null)
1996		{
1997	0	if (node.content != null)
1998		{
1999	0	normalizeSpaces(lexer, node.content);
2000		}
2001
2002	0	if (node.type == Node.TEXT_NODE)
2003		{
2004	0	int i;
2005	0	int[] c = new int[1];
2006	0	int p = node.start;
2007
2008	0	for (i = node.start; i < node.end; ++i)
2009		{
2010	0	c[0] = node.textarray[i];
2011
2012		// look for UTF-8 multibyte character
2013	0	if (c[0] > 0x7F)
2014		{
2015	0	i += PPrint.getUTF8(node.textarray, i, c);
2016		}
2017
2018	0	if (c[0] == 160)
2019		{
2020	0	c[0] = ' ';
2021		}
2022
2023	0	p = PPrint.putUTF8(node.textarray, p, c[0]);
2024		}
2025		}
2026
2027	0	node = node.next;
2028		}
2029		}
2030
2031		/**
2032		* Used to hunt for hidden preformatted sections.
2033		* @param node checked node
2034		* @return <code>true</code> if the node has a "margin-top: 0" or "margin-bottom: 0" style
2035		*/
2036	37	boolean noMargins(Node node)
2037		{
2038	37	AttVal attval = node.getAttrByName("style");
2039
2040	37	if (attval == null \|\| attval.value == null)
2041		{
2042	24	return false;
2043		}
2044
2045		// search for substring "margin-top: 0"
2046	13	if (attval.value.indexOf("margin-top: 0") == -1)
2047		{
2048	13	return false;
2049		}
2050
2051		// search for substring "margin-top: 0"
2052	0	if (attval.value.indexOf("margin-bottom: 0") == -1)
2053		{
2054	0	return false;
2055		}
2056
2057	0	return true;
2058		}
2059
2060		/**
2061		* Does element have a single space as its content?
2062		* @param lexer Lexer
2063		* @param node checked node
2064		* @return <code>true</code> if the element has a single space as its content
2065		*/
2066	46	boolean singleSpace(Lexer lexer, Node node)
2067		{
2068	46	if (node.content != null)
2069		{
2070	45	node = node.content;
2071
2072	45	if (node.next != null)
2073		{
2074	8	return false;
2075		}
2076
2077	37	if (node.type != Node.TEXT_NODE)
2078		{
2079	14	return false;
2080		}
2081
2082	23	if (((node.end - node.start) == 1) && lexer.lexbuf[node.start] == ' ')
2083		{
2084	0	return true;
2085		}
2086
2087	23	if ((node.end - node.start) == 2)
2088		{
2089	8	int[] c = new int[1];
2090
2091	8	PPrint.getUTF8(lexer.lexbuf, node.start, c);
2092
2093	8	if (c[0] == 160)
2094		{
2095	5	return true;
2096		}
2097		}
2098		}
2099
2100	19	return false;
2101		}
2102
2103		/**
2104		* This is a major clean up to strip out all the extra stuff you get when you save as web page from Word 2000. It
2105		* doesn't yet know what to do with VML tags, but these will appear as errors unless you declare them as new tags,
2106		* such as o:p which needs to be declared as inline.
2107		* @param lexer Lexer
2108		* @param node node to clean up
2109		*/
2110	198	public void cleanWord2000(Lexer lexer, Node node)
2111		{
2112		// used to a list from a sequence of bulletted p's
2113	198	Node list = null;
2114
2115	198	while (node != null)
2116		{
2117
2118		// get rid of Word's xmlns attributes
2119	340	if (node.tag == tt.tagHtml)
2120		{
2121		// check that it's a Word 2000 document
2122	6	if ((node.getAttrByName("xmlns:o") == null))
2123		{
2124	0	return;
2125		}
2126	6	lexer.configuration.tt.freeAttrs(node);
2127		}
2128
2129		// fix up preformatted sections by looking for a sequence of paragraphs with zero top/bottom margin
2130	340	if (node.tag == tt.tagP)
2131		{
2132	37	if (noMargins(node))
2133		{
2134	0	Node pre;
2135	0	Node next;
2136	0	Node.coerceNode(lexer, node, tt.tagPre);
2137
2138	0	purgeWord2000Attributes(node);
2139
2140	0	if (node.content != null)
2141		{
2142	0	cleanWord2000(lexer, node.content);
2143		}
2144
2145	0	pre = node;
2146	0	node = node.next;
2147
2148		// continue to strip p's
2149	0	while (node.tag == tt.tagP && noMargins(node))
2150		{
2151	0	next = node.next;
2152	0	node.removeNode();
2153	0	pre.insertNodeAtEnd(lexer.newLineNode());
2154	0	pre.insertNodeAtEnd(node);
2155	0	stripSpan(lexer, node);
2156	0	node = next;
2157		}
2158
2159	0	if (node == null)
2160		{
2161	0	break;
2162		}
2163		}
2164		}
2165
2166	340	if (node.tag != null && TidyUtils.toBoolean(node.tag.model & Dict.CM_BLOCK) && singleSpace(lexer, node))
2167		{
2168	5	node = stripSpan(lexer, node);
2169	5	continue;
2170		}
2171
2172		// discard Word's style verbiage
2173	335	if (node.tag == this.tt.tagStyle \|\| node.tag == this.tt.tagMeta \|\| node.type == Node.COMMENT_TAG)
2174		{
2175	29	node = Node.discardElement(node);
2176	29	continue;
2177		}
2178
2179		// strip out all span and font tags Word scatters so liberally!
2180	306	if (node.tag == this.tt.tagSpan \|\| node.tag == this.tt.tagFont)
2181		{
2182	27	node = stripSpan(lexer, node);
2183	27	continue;
2184		}
2185
2186	279	if (node.tag == this.tt.tagLink)
2187		{
2188	5	AttVal attr = node.getAttrByName("rel");
2189
2190	5	if (attr != null && attr.value != null && attr.value.equals("File-List"))
2191		{
2192	4	node = Node.discardElement(node);
2193	4	continue;
2194		}
2195		}
2196
2197		// discard empty paragraphs
2198	275	if (node.content == null && node.tag == this.tt.tagP)
2199		{
2200	1	node = Node.discardElement(node);
2201	1	continue;
2202		}
2203
2204	274	if (node.tag == this.tt.tagP)
2205		{
2206	31	AttVal attr = node.getAttrByName("class");
2207	31	AttVal atrStyle = node.getAttrByName("style");
2208
2209		// (JES) Sometimes Word marks a list item with the following hokie syntax
2210		// <p class="MsoNormal" style="...;mso-list:l1 level1 lfo1;
2211		// translate these into <li>
2212
2213		// map sequence of <p class="MsoListBullet"> to <ul> ... </ul>
2214		// map <p class="MsoListNumber"> to <ol>...</ol>
2215	31	if (attr != null
2216		&& attr.value != null
2217		&& ((attr.value.equals("MsoListBullet") \|\| attr.value.equals("MsoListNumber")) //
2218		\|\| (atrStyle != null && (atrStyle.value.indexOf("mso-list:") != -1)))) // 463066 - fix by Joel
2219		// Shafer 19 Sep 01
2220		{
2221	15	Dict listType = tt.tagUl;
2222
2223	15	if (attr.value.equals("MsoListNumber"))
2224		{
2225	0	listType = tt.tagOl;
2226		}
2227
2228	15	Node.coerceNode(lexer, node, this.tt.tagLi);
2229
2230	15	if (list == null \|\| list.tag != listType)
2231		{
2232	3	list = lexer.inferredTag(listType.name);
2233	3	Node.insertNodeBeforeElement(node, list);
2234		}
2235
2236	15	purgeWord2000Attributes(node);
2237
2238	15	if (node.content != null)
2239		{
2240	15	cleanWord2000(lexer, node.content);
2241		}
2242
2243		// remove node and append to contents of list
2244	15	node.removeNode();
2245	15	list.insertNodeAtEnd(node);
2246	15	node = list;
2247		}
2248		// map sequence of <p class="Code"> to <pre> ... </pre>
2249	16	else if (attr != null && attr.value != null && attr.value.equals("Code"))
2250		{
2251	0	Node br = lexer.newLineNode();
2252	0	normalizeSpaces(lexer, node);
2253
2254	0	if (list == null \|\| list.tag != this.tt.tagPre)
2255		{
2256	0	list = lexer.inferredTag("pre");
2257	0	Node.insertNodeBeforeElement(node, list);
2258		}
2259
2260		// remove node and append to contents of list
2261	0	node.removeNode();
2262	0	list.insertNodeAtEnd(node);
2263	0	stripSpan(lexer, node);
2264	0	list.insertNodeAtEnd(br);
2265	0	node = list.next;
2266		}
2267		else
2268		{
2269	16	list = null;
2270		}
2271		}
2272		else
2273		{
2274	243	list = null;
2275		}
2276
2277		// strip out style and class attributes
2278	274	if (node.type == Node.START_TAG \|\| node.type == Node.START_END_TAG)
2279		{
2280	141	purgeWord2000Attributes(node);
2281		}
2282
2283	274	if (node.content != null)
2284		{
2285	145	cleanWord2000(lexer, node.content);
2286		}
2287
2288	274	node = node.next;
2289		}
2290		}
2291
2292		/**
2293		* Check if the current document is a converted Word document.
2294		* @param root root Node
2295		* @return <code>true</code> if the document has been geenrated by Microsoft Word.
2296		*/
2297	8	public boolean isWord2000(Node root)
2298		{
2299	8	AttVal attval;
2300	8	Node node;
2301	8	Node head;
2302	8	Node html = root.findHTML(this.tt);
2303
2304	8	if (html != null && html.getAttrByName("xmlns:o") != null)
2305		{
2306	6	return true;
2307		}
2308
2309		// search for <meta name="GENERATOR" content="Microsoft ...">
2310	2	head = root.findHEAD(tt);
2311
2312	2	if (head != null)
2313		{
2314	2	for (node = head.content; node != null; node = node.next)
2315		{
2316	2	if (node.tag != tt.tagMeta)
2317		{
2318	2	continue;
2319		}
2320
2321	0	attval = node.getAttrByName("name");
2322
2323	0	if (attval == null \|\| attval.value == null)
2324		{
2325	0	continue;
2326		}
2327
2328	0	if (!"generator".equals(attval.value))
2329		{
2330	0	continue;
2331		}
2332
2333	0	attval = node.getAttrByName("content");
2334
2335	0	if (attval == null \|\| attval.value == null)
2336		{
2337	0	continue;
2338		}
2339
2340	0	if (attval.value.indexOf("Microsoft") != -1)
2341		{
2342	0	return true;
2343		}
2344		}
2345		}
2346
2347	2	return false;
2348		}
2349
2350		/**
2351		* Where appropriate move object elements from head to body.
2352		* @param lexer Lexer
2353		* @param html html node
2354		*/
2355	217	static void bumpObject(Lexer lexer, Node html)
2356		{
2357	217	if (html == null)
2358		{
2359	0	return;
2360		}
2361
2362	217	Node node, next, head = null, body = null;
2363	217	TagTable tt = lexer.configuration.tt;
2364	217	for (node = html.content; node != null; node = node.next)
2365		{
2366	437	if (node.tag == tt.tagHead)
2367		{
2368	213	head = node;
2369		}
2370
2371	437	if (node.tag == tt.tagBody)
2372		{
2373	218	body = node;
2374		}
2375		}
2376
2377	217	if (head != null && body != null)
2378		{
2379	213	for (node = head.content; node != null; node = next)
2380		{
2381	312	next = node.next;
2382
2383	312	if (node.tag == tt.tagObject)
2384		{
2385	1	Node child;
2386	1	boolean bump = false;
2387
2388	1	for (child = node.content; child != null; child = child.next)
2389		{
2390		// bump to body unless content is param
2391	1	if ((child.type == Node.TEXT_NODE && !node.isBlank(lexer)) \|\| child.tag != tt.tagParam)
2392		{
2393	1	bump = true;
2394	1	break;
2395		}
2396		}
2397
2398	1	if (bump)
2399		{
2400	1	node.removeNode();
2401	1	body.insertNodeAtStart(node);
2402		}
2403		}
2404		}
2405		}
2406		}
2407
2408		}