View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  /**
57   * Used for elements and text nodes element name is null for text nodes start and end are offsets into lexbuf which
58   * contains the textual content of all elements in the parse tree. Parent and content allow traversal of the parse tree
59   * in any direction. attributes are represented as a linked list of AttVal nodes which hold the strings for
60   * attribute/value pairs.
61   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
62   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
63   * @author Fabrizio Giustina
64   * @version $Revision: 779 $ ($Author: fgiust $)
65   */
66  public class Node implements Cloneable
67  {
68  
69      /**
70       * node type: root.
71       */
72      public static final short ROOT_NODE = 0;
73  
74      /**
75       * node type: doctype.
76       */
77      public static final short DOCTYPE_TAG = 1;
78  
79      /**
80       * node type: comment.
81       */
82      public static final short COMMENT_TAG = 2;
83  
84      /**
85       * node type: .
86       */
87      public static final short PROC_INS_TAG = 3;
88  
89      /**
90       * node type: text.
91       */
92      public static final short TEXT_NODE = 4;
93  
94      /**
95       * Start tag.
96       */
97      public static final short START_TAG = 5;
98  
99      /**
100      * End tag.
101      */
102     public static final short END_TAG = 6;
103 
104     /**
105      * Start of an end tag.
106      */
107     public static final short START_END_TAG = 7;
108 
109     /**
110      * node type: CDATA.
111      */
112     public static final short CDATA_TAG = 8;
113 
114     /**
115      * node type: section tag.
116      */
117     public static final short SECTION_TAG = 9;
118 
119     /**
120      * node type: asp tag.
121      */
122     public static final short ASP_TAG = 10;
123 
124     /**
125      * node type: jste tag.
126      */
127     public static final short JSTE_TAG = 11;
128 
129     /**
130      * node type: php tag.
131      */
132     public static final short PHP_TAG = 12;
133 
134     /**
135      * node type: doctype.
136      */
137     public static final short XML_DECL = 13;
138 
139     /**
140      * Description for all the node types. Used in toString.
141      */
142     private static final String[] NODETYPE_STRING = {
143         "RootNode",
144         "DocTypeTag",
145         "CommentTag",
146         "ProcInsTag",
147         "TextNode",
148         "StartTag",
149         "EndTag",
150         "StartEndTag",
151         "SectionTag",
152         "AspTag",
153         "PhpTag",
154         "XmlDecl"};
155 
156     /**
157      * parent node.
158      */
159     protected Node parent;
160 
161     /**
162      * pevious node.
163      */
164     protected Node prev;
165 
166     /**
167      * next node.
168      */
169     protected Node next;
170 
171     /**
172      * last node.
173      */
174     protected Node last;
175 
176     /**
177      * start of span onto text array.
178      */
179     protected int start;
180 
181     /**
182      * end of span onto text array.
183      */
184     protected int end;
185 
186     /**
187      * the text array.
188      */
189     protected byte[] textarray;
190 
191     /**
192      * TextNode, StartTag, EndTag etc.
193      */
194     protected short type;
195 
196     /**
197      * true if closed by explicit end tag.
198      */
199     protected boolean closed;
200 
201     /**
202      * true if inferred.
203      */
204     protected boolean implicit;
205 
206     /**
207      * true if followed by a line break.
208      */
209     protected boolean linebreak;
210 
211     /**
212      * old tag when it was changed.
213      */
214     protected Dict was;
215 
216     /**
217      * tag's dictionary definition.
218      */
219     protected Dict tag;
220 
221     /**
222      * Tag name.
223      */
224     protected String element;
225 
226     /**
227      * Attribute/Value linked list.
228      */
229     protected AttVal attributes;
230 
231     /**
232      * Contained node.
233      */
234     protected Node content;
235 
236     /**
237      * DOM adapter.
238      */
239     protected org.w3c.dom.Node adapter;
240 
241     /**
242      * Instantiates a new text node.
243      */
244     public Node()
245     {
246         this(TEXT_NODE, null, 0, 0);
247     }
248 
249     /**
250      * Instantiates a new node.
251      * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
252      * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
253      * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
254      * @param textarray array of bytes contained in the Node
255      * @param start start position
256      * @param end end position
257      */
258     public Node(short type, byte[] textarray, int start, int end)
259     {
260         this.parent = null;
261         this.prev = null;
262         this.next = null;
263         this.last = null;
264         this.start = start;
265         this.end = end;
266         this.textarray = textarray;
267         this.type = type;
268         this.closed = false;
269         this.implicit = false;
270         this.linebreak = false;
271         this.was = null;
272         this.tag = null;
273         this.element = null;
274         this.attributes = null;
275         this.content = null;
276     }
277 
278     /**
279      * Instantiates a new node.
280      * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
281      * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
282      * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
283      * @param textarray array of bytes contained in the Node
284      * @param start start position
285      * @param end end position
286      * @param element tag name
287      * @param tt tag table instance
288      */
289     public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
290     {
291         this.parent = null;
292         this.prev = null;
293         this.next = null;
294         this.last = null;
295         this.start = start;
296         this.end = end;
297         this.textarray = textarray;
298         this.type = type;
299         this.closed = false;
300         this.implicit = false;
301         this.linebreak = false;
302         this.was = null;
303         this.tag = null;
304         this.element = element;
305         this.attributes = null;
306         this.content = null;
307         if (type == START_TAG || type == START_END_TAG || type == END_TAG)
308         {
309             tt.findTag(this);
310         }
311     }
312 
313     /**
314      * Used to clone heading nodes when split by an hr.
315      * @see java.lang.Object#clone()
316      */
317     protected Object clone()
318     {
319         Node node;
320         try
321         {
322             node = (Node) super.clone();
323         }
324         catch (CloneNotSupportedException e)
325         {
326             // should never happen
327             throw new RuntimeException("CloneNotSupportedException " + e.getMessage());
328         }
329         if (this.textarray != null)
330         {
331             node.textarray = new byte[this.end - this.start];
332             node.start = 0;
333             node.end = this.end - this.start;
334             if (node.end > 0)
335             {
336                 System.arraycopy(this.textarray, this.start, node.textarray, node.start, node.end);
337             }
338         }
339         if (this.attributes != null)
340         {
341             node.attributes = (AttVal) this.attributes.clone();
342         }
343         return node;
344     }
345 
346     /**
347      * Returns an attribute with the given name in the current node.
348      * @param name attribute name.
349      * @return AttVal instance or null if no attribute with the iven name is found
350      */
351     public AttVal getAttrByName(String name)
352     {
353         AttVal attr;
354 
355         for (attr = this.attributes; attr != null; attr = attr.next)
356         {
357             if (name != null && attr.attribute != null && attr.attribute.equals(name))
358             {
359                 break;
360             }
361         }
362 
363         return attr;
364     }
365 
366     /**
367      * Default method for checking an element's attributes.
368      * @param lexer Lexer
369      */
370     public void checkAttributes(Lexer lexer)
371     {
372         AttVal attval;
373 
374         for (attval = this.attributes; attval != null; attval = attval.next)
375         {
376             attval.checkAttribute(lexer, this);
377         }
378     }
379 
380     /**
381      * The same attribute name can't be used more than once in each element. Discard or join attributes according to
382      * configuration.
383      * @param lexer Lexer
384      */
385     public void repairDuplicateAttributes(Lexer lexer)
386     {
387         AttVal attval;
388 
389         for (attval = this.attributes; attval != null;)
390         {
391             if (attval.asp == null && attval.php == null)
392             {
393                 AttVal current;
394 
395                 for (current = attval.next; current != null;)
396                 {
397                     if (current.asp == null
398                         && current.php == null
399                         && attval.attribute != null
400                         && attval.attribute.equalsIgnoreCase(current.attribute))
401                     {
402                         AttVal temp;
403 
404                         if ("class".equalsIgnoreCase(current.attribute) && lexer.configuration.joinClasses)
405                         {
406                             // concatenate classes
407                             current.value = current.value + " " + attval.value;
408 
409                             temp = attval.next;
410 
411                             if (temp.next == null)
412                             {
413                                 current = null;
414                             }
415                             else
416                             {
417                                 current = current.next;
418                             }
419 
420                             lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);
421 
422                             removeAttribute(attval);
423                             attval = temp;
424                         }
425                         else if ("style".equalsIgnoreCase(current.attribute) && lexer.configuration.joinStyles)
426                         {
427                             // concatenate styles
428 
429                             // this doesn't handle CSS comments and leading/trailing white-space very well see
430                             // http://www.w3.org/TR/css-style-attr
431 
432                             int end = current.value.length() - 1;
433 
434                             if (current.value.charAt(end) == ';')
435                             {
436                                 // attribute ends with declaration seperator
437                                 current.value = current.value + " " + attval.value;
438                             }
439                             else if (current.value.charAt(end) == '}')
440                             {
441                                 // attribute ends with rule set
442                                 current.value = current.value + " { " + attval.value + " }";
443                             }
444                             else
445                             {
446                                 // attribute ends with property value
447                                 current.value = current.value + "; " + attval.value;
448                             }
449 
450                             temp = attval.next;
451 
452                             if (temp.next == null)
453                             {
454                                 current = null;
455                             }
456                             else
457                             {
458                                 current = current.next;
459                             }
460 
461                             lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);
462 
463                             removeAttribute(attval);
464                             attval = temp;
465 
466                         }
467                         else if (lexer.configuration.duplicateAttrs == Configuration.KEEP_LAST)
468                         {
469                             temp = current.next;
470 
471                             lexer.report.attrError(lexer, this, current, Report.REPEATED_ATTRIBUTE);
472 
473                             removeAttribute(current);
474                             current = temp;
475                         }
476                         else
477                         {
478                             temp = attval.next;
479 
480                             if (attval.next == null)
481                             {
482                                 current = null;
483                             }
484                             else
485                             {
486                                 current = current.next;
487                             }
488 
489                             lexer.report.attrError(lexer, this, attval, Report.REPEATED_ATTRIBUTE);
490 
491                             removeAttribute(attval);
492                             attval = temp;
493                         }
494                     }
495                     else
496                     {
497                         current = current.next;
498                     }
499                 }
500                 attval = attval.next;
501             }
502             else
503             {
504                 attval = attval.next;
505             }
506         }
507     }
508 
509     /**
510      * Adds an attribute to the node.
511      * @param name attribute name
512      * @param value attribute value
513      */
514     public void addAttribute(String name, String value)
515     {
516         AttVal av = new AttVal(null, null, null, null, '"', name, value);
517         av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
518 
519         if (this.attributes == null)
520         {
521             this.attributes = av;
522         }
523         else
524         {
525             // append to end of attributes
526             AttVal here = this.attributes;
527 
528             while (here.next != null)
529             {
530                 here = here.next;
531             }
532 
533             here.next = av;
534         }
535     }
536 
537     /**
538      * Remove an attribute from node and then free it.
539      * @param attr attribute to remove
540      */
541     public void removeAttribute(AttVal attr)
542     {
543         AttVal av;
544         AttVal prev = null;
545         AttVal next;
546 
547         for (av = this.attributes; av != null; av = next)
548         {
549             next = av.next;
550 
551             if (av == attr)
552             {
553                 if (prev != null)
554                 {
555                     prev.next = next;
556                 }
557                 else
558                 {
559                     this.attributes = next;
560                 }
561             }
562             else
563             {
564                 prev = av;
565             }
566         }
567     }
568 
569     /**
570      * Find the doctype element.
571      * @return doctype node or null if not found
572      */
573     public Node findDocType()
574     {
575         Node node = this.content;
576 
577         while (node != null && node.type != DOCTYPE_TAG)
578         {
579             node = node.next;
580         }
581 
582         return node;
583     }
584 
585     /**
586      * Discard the doctype node.
587      */
588     public void discardDocType()
589     {
590         Node node;
591 
592         node = findDocType();
593         if (node != null)
594         {
595             if (node.prev != null)
596             {
597                 node.prev.next = node.next;
598             }
599             else
600             {
601                 node.parent.content = node.next;
602             }
603 
604             if (node.next != null)
605             {
606                 node.next.prev = node.prev;
607             }
608 
609             node.next = null;
610         }
611     }
612 
613     /**
614      * Remove node from markup tree and discard it.
615      * @param element discarded node
616      * @return next node
617      */
618     public static Node discardElement(Node element)
619     {
620         Node next = null;
621 
622         if (element != null)
623         {
624             next = element.next;
625             element.removeNode();
626         }
627 
628         return next;
629     }
630 
631     /**
632      * Insert a node into markup tree.
633      * @param node to insert
634      */
635     public void insertNodeAtStart(Node node)
636     {
637         node.parent = this;
638 
639         if (this.content == null)
640         {
641             this.last = node;
642         }
643         else
644         {
645             this.content.prev = node; // AQ added 13 Apr 2000
646         }
647 
648         node.next = this.content;
649         node.prev = null;
650         this.content = node;
651     }
652 
653     /**
654      * Insert node into markup tree.
655      * @param node Node to insert
656      */
657     public void insertNodeAtEnd(Node node)
658     {
659         node.parent = this;
660         node.prev = this.last;
661 
662         if (this.last != null)
663         {
664             this.last.next = node;
665         }
666         else
667         {
668             this.content = node;
669         }
670 
671         this.last = node;
672     }
673 
674     /**
675      * Insert node into markup tree in pace of element which is moved to become the child of the node.
676      * @param element child node. Will be inserted as a child of element
677      * @param node parent node
678      */
679     public static void insertNodeAsParent(Node element, Node node)
680     {
681         node.content = element;
682         node.last = element;
683         node.parent = element.parent;
684         element.parent = node;
685 
686         if (node.parent.content == element)
687         {
688             node.parent.content = node;
689         }
690 
691         if (node.parent.last == element)
692         {
693             node.parent.last = node;
694         }
695 
696         node.prev = element.prev;
697         element.prev = null;
698 
699         if (node.prev != null)
700         {
701             node.prev.next = node;
702         }
703 
704         node.next = element.next;
705         element.next = null;
706 
707         if (node.next != null)
708         {
709             node.next.prev = node;
710         }
711     }
712 
713     /**
714      * Insert node into markup tree before element.
715      * @param element child node. Will be insertedbefore element
716      * @param node following node
717      */
718     public static void insertNodeBeforeElement(Node element, Node node)
719     {
720         Node parent;
721 
722         parent = element.parent;
723         node.parent = parent;
724         node.next = element;
725         node.prev = element.prev;
726         element.prev = node;
727 
728         if (node.prev != null)
729         {
730             node.prev.next = node;
731         }
732 
733         if (parent != null && parent.content == element)
734         {
735             parent.content = node;
736         }
737     }
738 
739     /**
740      * Insert node into markup tree after element.
741      * @param node new node to insert
742      */
743     public void insertNodeAfterElement(Node node)
744     {
745         Node parent;
746 
747         parent = this.parent;
748         node.parent = parent;
749 
750         // AQ - 13Jan2000 fix for parent == null
751         if (parent != null && parent.last == this)
752         {
753             parent.last = node;
754         }
755         else
756         {
757             node.next = this.next;
758             // AQ - 13Jan2000 fix for node.next == null
759             if (node.next != null)
760             {
761                 node.next.prev = node;
762             }
763         }
764 
765         this.next = node;
766         node.prev = this;
767     }
768 
769     /**
770      * Trim an empty element.
771      * @param lexer Lexer
772      * @param element empty node to be removed
773      */
774     public static void trimEmptyElement(Lexer lexer, Node element)
775     {
776         // don't trim if user explicitely set trim-empty-elements to false
777         // empty element can be needed in css sites
778         if (lexer.configuration.trimEmpty)
779         {
780             TagTable tt = lexer.configuration.tt;
781 
782             if (lexer.canPrune(element))
783             {
784                 if (element.type != TEXT_NODE)
785                 {
786                     lexer.report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
787                 }
788 
789                 discardElement(element);
790             }
791             else if (element.tag == tt.tagP && element.content == null)
792             {
793                 // replace <p></p> by <br><br> to preserve formatting
794                 Node node = lexer.inferredTag("br");
795                 Node.coerceNode(lexer, element, tt.tagBr);
796                 element.insertNodeAfterElement(node);
797             }
798         }
799     }
800 
801     /**
802      * This maps <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>. If last child of
803      * element is a text node then trim trailing white space character moving it to after element's end tag.
804      * @param lexer Lexer
805      * @param element node
806      * @param last last child of element
807      */
808     public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
809     {
810         byte c;
811         TagTable tt = lexer.configuration.tt;
812 
813         if (last != null && last.type == Node.TEXT_NODE)
814         {
815             if (last.end > last.start)
816 
817             {
818                 c = lexer.lexbuf[last.end - 1];
819 
820                 if (c == 160 || c == (byte) ' ')
821                 {
822                     // take care with <td> &nbsp; </td>
823                     // fix for [435920]
824                     if (c == 160 && (element.tag == tt.tagTd || element.tag == tt.tagTh))
825                     {
826                         if (last.end > last.start + 1)
827                         {
828                             last.end -= 1;
829                         }
830                     }
831                     else
832                     {
833                         last.end -= 1;
834 
835                         if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
836                             && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD))
837                         {
838                             lexer.insertspace = true;
839                         }
840                     }
841                 }
842             }
843             // if empty string then delete from parse tree
844             if (last.start == last.end) // COMMENT_NBSP_FIX: && tag != tag_td && tag != tag_th
845             {
846                 trimEmptyElement(lexer, last);
847             }
848         }
849     }
850 
851     /**
852      * Escapes the given tag.
853      * @param lexer Lexer
854      * @param element node to be escaped
855      * @return escaped node
856      */
857     protected static Node escapeTag(Lexer lexer, Node element)
858     {
859         Node node = lexer.newNode();
860         node.start = lexer.lexsize;
861         node.textarray = element.textarray; // @todo check it
862         lexer.addByte('<');
863 
864         if (element.type == END_TAG)
865         {
866             lexer.addByte('/');
867         }
868 
869         if (element.element != null)
870         {
871             lexer.addStringLiteral(element.element);
872         }
873         else if (element.type == DOCTYPE_TAG)
874         {
875             int i;
876 
877             lexer.addByte('!');
878             lexer.addByte('D');
879             lexer.addByte('O');
880             lexer.addByte('C');
881             lexer.addByte('T');
882             lexer.addByte('Y');
883             lexer.addByte('P');
884             lexer.addByte('E');
885             lexer.addByte(' ');
886 
887             for (i = element.start; i < element.end; ++i)
888             {
889                 lexer.addByte(lexer.lexbuf[i]);
890             }
891         }
892 
893         if (element.type == START_END_TAG)
894         {
895             lexer.addByte('/');
896         }
897 
898         lexer.addByte('>');
899         node.end = lexer.lexsize;
900 
901         return node;
902     }
903 
904     /**
905      * Is the node content empty or blank? Assumes node is a text node.
906      * @param lexer Lexer
907      * @return <code>true</code> if the node content empty or blank
908      */
909     public boolean isBlank(Lexer lexer)
910     {
911         if (this.type == TEXT_NODE)
912         {
913             if (this.end == this.start)
914             {
915                 return true;
916             }
917             if (this.end == this.start + 1 && lexer.lexbuf[this.end - 1] == ' ')
918             {
919                 return true;
920             }
921         }
922         return false;
923     }
924 
925     /**
926      * This maps <code>&lt;p> hello &lt;em> world &lt;/em></code> to <code>&lt;p> hello &lt;em> world &lt;/em></code>.
927      * Trims initial space, by moving it before the start tag, or if this element is the first in parent's content, then
928      * by discarding the space.
929      * @param lexer Lexer
930      * @param element parent node
931      * @param text text node
932      */
933     public static void trimInitialSpace(Lexer lexer, Node element, Node text)
934     {
935         Node prev, node;
936 
937         // #427677 - fix by Gary Peskin 31 Oct 00
938         if (text.type == TEXT_NODE && text.textarray[text.start] == (byte) ' ' && (text.start < text.end))
939         {
940             if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
941                 && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD)
942                 && element.parent.content != element)
943             {
944                 prev = element.prev;
945 
946                 if (prev != null && prev.type == TEXT_NODE)
947                 {
948                     if (prev.textarray[prev.end - 1] != (byte) ' ')
949                     {
950                         prev.textarray[prev.end++] = (byte) ' ';
951                     }
952 
953                     ++element.start;
954                 }
955                 else
956                 {
957                     // create new node
958                     node = lexer.newNode();
959                     // Local fix for bug 228486 (GLP). This handles the case
960                     // where we need to create a preceeding text node but there are
961                     // no "slots" in textarray that we can steal from the current
962                     // element. Therefore, we create a new textarray containing
963                     // just the blank. When Tidy is fixed, this should be removed.
964                     if (element.start >= element.end)
965                     {
966                         node.start = 0;
967                         node.end = 1;
968                         node.textarray = new byte[1];
969                     }
970                     else
971                     {
972                         node.start = element.start++;
973                         node.end = element.start;
974                         node.textarray = element.textarray;
975                     }
976                     node.textarray[node.start] = (byte) ' ';
977                     node.prev = prev;
978                     if (prev != null)
979                     {
980                         prev.next = node;
981                     }
982                     node.next = element;
983                     element.prev = node;
984                     node.parent = element.parent;
985                 }
986             }
987 
988             // discard the space in current node
989             ++text.start;
990         }
991     }
992 
993     /**
994      * Move initial and trailing space out. This routine maps: hello <em> world </em> to hello <em> world </em> and
995      * <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>.
996      * @param lexer Lexer
997      * @param element Node
998      */
999     public static void trimSpaces(Lexer lexer, Node element)
1000     {
1001         Node text = element.content;
1002         TagTable tt = lexer.configuration.tt;
1003 
1004         if (text != null && text.type == Node.TEXT_NODE && element.tag != tt.tagPre)
1005         {
1006             trimInitialSpace(lexer, element, text);
1007         }
1008 
1009         text = element.last;
1010 
1011         if (text != null && text.type == Node.TEXT_NODE)
1012         {
1013             trimTrailingSpace(lexer, element, text);
1014         }
1015     }
1016 
1017     /**
1018      * Is this node contained in a given tag?
1019      * @param tag descendant tag
1020      * @return <code>true</code> if node is contained in tag
1021      */
1022     public boolean isDescendantOf(Dict tag)
1023     {
1024         Node parent;
1025 
1026         for (parent = this.parent; parent != null; parent = parent.parent)
1027         {
1028             if (parent.tag == tag)
1029             {
1030                 return true;
1031             }
1032         }
1033 
1034         return false;
1035     }
1036 
1037     /**
1038      * The doctype has been found after other tags, and needs moving to before the html element.
1039      * @param lexer Lexer
1040      * @param element document
1041      * @param doctype doctype node to insert at the beginning of element
1042      */
1043     public static void insertDocType(Lexer lexer, Node element, Node doctype)
1044     {
1045         TagTable tt = lexer.configuration.tt;
1046 
1047         lexer.report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
1048 
1049         while (element.tag != tt.tagHtml)
1050         {
1051             element = element.parent;
1052         }
1053 
1054         insertNodeBeforeElement(element, doctype);
1055     }
1056 
1057     /**
1058      * Find the body node.
1059      * @param tt tag table
1060      * @return body node
1061      */
1062     public Node findBody(TagTable tt)
1063     {
1064         Node node;
1065 
1066         node = this.content;
1067 
1068         while (node != null && node.tag != tt.tagHtml)
1069         {
1070             node = node.next;
1071         }
1072 
1073         if (node == null)
1074         {
1075             return null;
1076         }
1077 
1078         node = node.content;
1079 
1080         while (node != null && node.tag != tt.tagBody && node.tag != tt.tagFrameset)
1081         {
1082             node = node.next;
1083         }
1084 
1085         if (node.tag == tt.tagFrameset)
1086         {
1087             node = node.content;
1088 
1089             while (node != null && node.tag != tt.tagNoframes)
1090             {
1091                 node = node.next;
1092             }
1093 
1094             if (node != null)
1095             {
1096                 node = node.content;
1097                 while (node != null && node.tag != tt.tagBody)
1098                 {
1099                     node = node.next;
1100                 }
1101             }
1102         }
1103 
1104         return node;
1105     }
1106 
1107     /**
1108      * Is the node an element?
1109      * @return <code>true</code> if type is START_TAG | START_END_TAG
1110      */
1111     public boolean isElement()
1112     {
1113         return (this.type == START_TAG || this.type == START_END_TAG ? true : false);
1114     }
1115 
1116     /**
1117      * Unexpected content in table row is moved to just before the table in accordance with Netscape and IE. This code
1118      * assumes that node hasn't been inserted into the row.
1119      * @param row Row node
1120      * @param node Node which should be moved before the table
1121      * @param tt tag table
1122      */
1123     public static void moveBeforeTable(Node row, Node node, TagTable tt)
1124     {
1125         Node table;
1126 
1127         /* first find the table element */
1128         for (table = row.parent; table != null; table = table.parent)
1129         {
1130             if (table.tag == tt.tagTable)
1131             {
1132                 if (table.parent.content == table)
1133                 {
1134                     table.parent.content = node;
1135                 }
1136 
1137                 node.prev = table.prev;
1138                 node.next = table;
1139                 table.prev = node;
1140                 node.parent = table.parent;
1141 
1142                 if (node.prev != null)
1143                 {
1144                     node.prev.next = node;
1145                 }
1146 
1147                 break;
1148             }
1149         }
1150     }
1151 
1152     /**
1153      * If a table row is empty then insert an empty cell.This practice is consistent with browser behavior and avoids
1154      * potential problems with row spanning cells.
1155      * @param lexer Lexer
1156      * @param row row node
1157      */
1158     public static void fixEmptyRow(Lexer lexer, Node row)
1159     {
1160         Node cell;
1161 
1162         if (row.content == null)
1163         {
1164             cell = lexer.inferredTag("td");
1165             row.insertNodeAtEnd(cell);
1166             lexer.report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
1167         }
1168     }
1169 
1170     /**
1171      * Coerce a node.
1172      * @param lexer Lexer
1173      * @param node Node
1174      * @param tag tag dictionary reference
1175      */
1176     public static void coerceNode(Lexer lexer, Node node, Dict tag)
1177     {
1178         Node tmp = lexer.inferredTag(tag.name);
1179         lexer.report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
1180         node.was = node.tag;
1181         node.tag = tag;
1182         node.type = START_TAG;
1183         node.implicit = true;
1184         node.element = tag.name;
1185     }
1186 
1187     /**
1188      * Extract this node and its children from a markup tree.
1189      */
1190     public void removeNode()
1191     {
1192         if (this.prev != null)
1193         {
1194             this.prev.next = this.next;
1195         }
1196 
1197         if (this.next != null)
1198         {
1199             this.next.prev = this.prev;
1200         }
1201 
1202         if (this.parent != null)
1203         {
1204             if (this.parent.content == this)
1205             {
1206                 this.parent.content = this.next;
1207             }
1208 
1209             if (this.parent.last == this)
1210             {
1211                 this.parent.last = this.prev;
1212             }
1213         }
1214 
1215         this.parent = null;
1216         this.prev = null;
1217         this.next = null;
1218     }
1219 
1220     /**
1221      * Insert a node at the end.
1222      * @param element parent node
1223      * @param node will be inserted at the end of element
1224      * @return <code>true</code> if the node has been inserted
1225      */
1226     public static boolean insertMisc(Node element, Node node)
1227     {
1228         if (node.type == COMMENT_TAG
1229             || node.type == PROC_INS_TAG
1230             || node.type == CDATA_TAG
1231             || node.type == SECTION_TAG
1232             || node.type == ASP_TAG
1233             || node.type == JSTE_TAG
1234             || node.type == PHP_TAG
1235             || node.type == XML_DECL)
1236         {
1237             element.insertNodeAtEnd(node);
1238             return true;
1239         }
1240 
1241         return false;
1242     }
1243 
1244     /**
1245      * Is this a new (user defined) node? Used to determine how attributes without values should be printed. This was
1246      * introduced to deal with user defined tags e.g. Cold Fusion.
1247      * @return <code>true</code> if this node represents a user-defined tag.
1248      */
1249     public boolean isNewNode()
1250     {
1251         if (this.tag != null)
1252         {
1253             return TidyUtils.toBoolean(this.tag.model & Dict.CM_NEW);
1254         }
1255 
1256         return true;
1257     }
1258 
1259     /**
1260      * Does the node have one (and only one) child?
1261      * @return <code>true</code> if the node has one child
1262      */
1263     public boolean hasOneChild()
1264     {
1265         return (this.content != null && this.content.next == null);
1266     }
1267 
1268     /**
1269      * Find the "html" element.
1270      * @param tt tag table
1271      * @return html node
1272      */
1273     public Node findHTML(TagTable tt)
1274     {
1275         Node node;
1276 
1277         for (node = this.content; node != null && node.tag != tt.tagHtml; node = node.next)
1278         {
1279             //
1280         }
1281 
1282         return node;
1283     }
1284 
1285     /**
1286      * Find the head tag.
1287      * @param tt tag table
1288      * @return head node
1289      */
1290     public Node findHEAD(TagTable tt)
1291     {
1292         Node node;
1293 
1294         node = this.findHTML(tt);
1295 
1296         if (node != null)
1297         {
1298             for (node = node.content; node != null && node.tag != tt.tagHead; node = node.next)
1299             {
1300                 //
1301             }
1302         }
1303 
1304         return node;
1305     }
1306 
1307     /**
1308      * Checks for node integrity.
1309      * @return false if node is not consistent
1310      */
1311     public boolean checkNodeIntegrity()
1312     {
1313         Node child;
1314         boolean found = false;
1315 
1316         if (this.prev != null)
1317         {
1318             if (this.prev.next != this)
1319             {
1320                 return false;
1321             }
1322         }
1323 
1324         if (this.next != null)
1325         {
1326             if (this.next.prev != this)
1327             {
1328                 return false;
1329             }
1330         }
1331 
1332         if (this.parent != null)
1333         {
1334             if (this.prev == null && this.parent.content != this)
1335             {
1336                 return false;
1337             }
1338 
1339             if (this.next == null && this.parent.last != this)
1340             {
1341                 return false;
1342             }
1343 
1344             for (child = this.parent.content; child != null; child = child.next)
1345             {
1346                 if (child == this)
1347                 {
1348                     found = true;
1349                     break;
1350                 }
1351             }
1352 
1353             if (!found)
1354             {
1355                 return false;
1356             }
1357         }
1358 
1359         for (child = this.content; child != null; child = child.next)
1360         {
1361             if (!child.checkNodeIntegrity())
1362             {
1363                 return false;
1364             }
1365         }
1366         return true;
1367     }
1368 
1369     /**
1370      * Add a css class to the node. If a class attribute already exists adds the value to the existing attribute.
1371      * @param classname css class name
1372      */
1373     public void addClass(String classname)
1374     {
1375         AttVal classattr = this.getAttrByName("class");
1376 
1377         // if there already is a class attribute then append class name after a space
1378         if (classattr != null)
1379         {
1380             classattr.value = classattr.value + " " + classname;
1381         }
1382         else
1383         {
1384             // create new class attribute
1385             this.addAttribute("class", classname);
1386         }
1387     }
1388 
1389     /**
1390      * @see java.lang.Object#toString()
1391      */
1392     public String toString()
1393     {
1394         String s = "";
1395         Node n = this;
1396 
1397         while (n != null)
1398         {
1399             s += "[Node type=";
1400             s += NODETYPE_STRING[n.type];
1401             s += ",element=";
1402             if (n.element != null)
1403             {
1404                 s += n.element;
1405             }
1406             else
1407             {
1408                 s += "null";
1409             }
1410             if (n.type == TEXT_NODE || n.type == COMMENT_TAG || n.type == PROC_INS_TAG)
1411             {
1412                 s += ",text=";
1413                 if (n.textarray != null && n.start <= n.end)
1414                 {
1415                     s += "\"";
1416                     s += TidyUtils.getString(n.textarray, n.start, n.end - n.start);
1417                     s += "\"";
1418                 }
1419                 else
1420                 {
1421                     s += "null";
1422                 }
1423             }
1424             s += ",content=";
1425             if (n.content != null)
1426             {
1427                 s += n.content.toString();
1428             }
1429             else
1430             {
1431                 s += "null";
1432             }
1433             s += "]";
1434             if (n.next != null)
1435             {
1436                 s += ",";
1437             }
1438             n = n.next;
1439         }
1440         return s;
1441     }
1442 
1443     /**
1444      * Returns a DOM Node which wrap the current tidy Node.
1445      * @return org.w3c.dom.Node instance
1446      */
1447     protected org.w3c.dom.Node getAdapter()
1448     {
1449         if (adapter == null)
1450         {
1451             switch (this.type)
1452             {
1453                 case ROOT_NODE :
1454                     adapter = new DOMDocumentImpl(this);
1455                     break;
1456                 case START_TAG :
1457                 case START_END_TAG :
1458                     adapter = new DOMElementImpl(this);
1459                     break;
1460                 case DOCTYPE_TAG :
1461                     adapter = new DOMDocumentTypeImpl(this);
1462                     break;
1463                 case COMMENT_TAG :
1464                     adapter = new DOMCommentImpl(this);
1465                     break;
1466                 case TEXT_NODE :
1467                     adapter = new DOMTextImpl(this);
1468                     break;
1469                 case CDATA_TAG :
1470                     adapter = new DOMCDATASectionImpl(this);
1471                     break;
1472                 case PROC_INS_TAG :
1473                     adapter = new DOMProcessingInstructionImpl(this);
1474                     break;
1475                 default :
1476                     adapter = new DOMNodeImpl(this);
1477             }
1478         }
1479         return adapter;
1480     }
1481 
1482     /**
1483      * Clone this node.
1484      * @param deep if true deep clone the node (also clones all the contained nodes)
1485      * @return cloned node
1486      */
1487     protected Node cloneNode(boolean deep)
1488     {
1489         Node node = (Node) this.clone();
1490         if (deep)
1491         {
1492             Node child;
1493             Node newChild;
1494             for (child = this.content; child != null; child = child.next)
1495             {
1496                 newChild = child.cloneNode(deep);
1497                 node.insertNodeAtEnd(newChild);
1498             }
1499         }
1500         return node;
1501     }
1502 
1503     /**
1504      * Setter for node type.
1505      * @param newType a valid node type constant
1506      */
1507     protected void setType(short newType)
1508     {
1509         this.type = newType;
1510     }
1511 
1512     /**
1513      * Used to check script node for script language.
1514      * @return <code>true</code> if the script node contains javascript
1515      */
1516     public boolean isJavaScript()
1517     {
1518         boolean result = false;
1519         AttVal attr;
1520 
1521         if (this.attributes == null)
1522         {
1523             return true;
1524         }
1525 
1526         for (attr = this.attributes; attr != null; attr = attr.next)
1527         {
1528             if (("language".equalsIgnoreCase(attr.attribute) || "type".equalsIgnoreCase(attr.attribute))
1529                 && "javascript".equalsIgnoreCase(attr.value))
1530             {
1531                 result = true;
1532             }
1533         }
1534 
1535         return result;
1536     }
1537 
1538     /**
1539      * Does the node expect contents?
1540      * @return <code>false</code> if this node should be empty
1541      */
1542     public boolean expectsContent()
1543     {
1544         if (this.type != Node.START_TAG)
1545         {
1546             return false;
1547         }
1548 
1549         // unknown element?
1550         if (this.tag == null)
1551         {
1552             return true;
1553         }
1554 
1555         if (TidyUtils.toBoolean(this.tag.model & Dict.CM_EMPTY))
1556         {
1557             return false;
1558         }
1559 
1560         return true;
1561     }
1562 }