1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 /**
57 * Used for elements and text nodes element name is null for text nodes start and end are offsets into lexbuf which
58 * contains the textual content of all elements in the parse tree. Parent and content allow traversal of the parse tree
59 * in any direction. attributes are represented as a linked list of AttVal nodes which hold the strings for
60 * attribute/value pairs.
61 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
62 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
63 * @author Fabrizio Giustina
64 * @version $Revision: 779 $ ($Author: fgiust $)
65 */
66 public class Node implements Cloneable
67 {
68
69 /**
70 * node type: root.
71 */
72 public static final short ROOT_NODE = 0;
73
74 /**
75 * node type: doctype.
76 */
77 public static final short DOCTYPE_TAG = 1;
78
79 /**
80 * node type: comment.
81 */
82 public static final short COMMENT_TAG = 2;
83
84 /**
85 * node type: .
86 */
87 public static final short PROC_INS_TAG = 3;
88
89 /**
90 * node type: text.
91 */
92 public static final short TEXT_NODE = 4;
93
94 /**
95 * Start tag.
96 */
97 public static final short START_TAG = 5;
98
99 /**
100 * End tag.
101 */
102 public static final short END_TAG = 6;
103
104 /**
105 * Start of an end tag.
106 */
107 public static final short START_END_TAG = 7;
108
109 /**
110 * node type: CDATA.
111 */
112 public static final short CDATA_TAG = 8;
113
114 /**
115 * node type: section tag.
116 */
117 public static final short SECTION_TAG = 9;
118
119 /**
120 * node type: asp tag.
121 */
122 public static final short ASP_TAG = 10;
123
124 /**
125 * node type: jste tag.
126 */
127 public static final short JSTE_TAG = 11;
128
129 /**
130 * node type: php tag.
131 */
132 public static final short PHP_TAG = 12;
133
134 /**
135 * node type: doctype.
136 */
137 public static final short XML_DECL = 13;
138
139 /**
140 * Description for all the node types. Used in toString.
141 */
142 private static final String[] NODETYPE_STRING = {
143 "RootNode",
144 "DocTypeTag",
145 "CommentTag",
146 "ProcInsTag",
147 "TextNode",
148 "StartTag",
149 "EndTag",
150 "StartEndTag",
151 "SectionTag",
152 "AspTag",
153 "PhpTag",
154 "XmlDecl"};
155
156 /**
157 * parent node.
158 */
159 protected Node parent;
160
161 /**
162 * pevious node.
163 */
164 protected Node prev;
165
166 /**
167 * next node.
168 */
169 protected Node next;
170
171 /**
172 * last node.
173 */
174 protected Node last;
175
176 /**
177 * start of span onto text array.
178 */
179 protected int start;
180
181 /**
182 * end of span onto text array.
183 */
184 protected int end;
185
186 /**
187 * the text array.
188 */
189 protected byte[] textarray;
190
191 /**
192 * TextNode, StartTag, EndTag etc.
193 */
194 protected short type;
195
196 /**
197 * true if closed by explicit end tag.
198 */
199 protected boolean closed;
200
201 /**
202 * true if inferred.
203 */
204 protected boolean implicit;
205
206 /**
207 * true if followed by a line break.
208 */
209 protected boolean linebreak;
210
211 /**
212 * old tag when it was changed.
213 */
214 protected Dict was;
215
216 /**
217 * tag's dictionary definition.
218 */
219 protected Dict tag;
220
221 /**
222 * Tag name.
223 */
224 protected String element;
225
226 /**
227 * Attribute/Value linked list.
228 */
229 protected AttVal attributes;
230
231 /**
232 * Contained node.
233 */
234 protected Node content;
235
236 /**
237 * DOM adapter.
238 */
239 protected org.w3c.dom.Node adapter;
240
241 /**
242 * Instantiates a new text node.
243 */
244 public Node()
245 {
246 this(TEXT_NODE, null, 0, 0);
247 }
248
249 /**
250 * Instantiates a new node.
251 * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
252 * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
253 * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
254 * @param textarray array of bytes contained in the Node
255 * @param start start position
256 * @param end end position
257 */
258 public Node(short type, byte[] textarray, int start, int end)
259 {
260 this.parent = null;
261 this.prev = null;
262 this.next = null;
263 this.last = null;
264 this.start = start;
265 this.end = end;
266 this.textarray = textarray;
267 this.type = type;
268 this.closed = false;
269 this.implicit = false;
270 this.linebreak = false;
271 this.was = null;
272 this.tag = null;
273 this.element = null;
274 this.attributes = null;
275 this.content = null;
276 }
277
278 /**
279 * Instantiates a new node.
280 * @param type node type: Node.ROOT_NODE | Node.DOCTYPE_TAG | Node.COMMENT_TAG | Node.PROC_INS_TAG | Node.TEXT_NODE |
281 * Node.START_TAG | Node.END_TAG | Node.START_END_TAG | Node.CDATA_TAG | Node.SECTION_TAG | Node. ASP_TAG |
282 * Node.JSTE_TAG | Node.PHP_TAG | Node.XML_DECL
283 * @param textarray array of bytes contained in the Node
284 * @param start start position
285 * @param end end position
286 * @param element tag name
287 * @param tt tag table instance
288 */
289 public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt)
290 {
291 this.parent = null;
292 this.prev = null;
293 this.next = null;
294 this.last = null;
295 this.start = start;
296 this.end = end;
297 this.textarray = textarray;
298 this.type = type;
299 this.closed = false;
300 this.implicit = false;
301 this.linebreak = false;
302 this.was = null;
303 this.tag = null;
304 this.element = element;
305 this.attributes = null;
306 this.content = null;
307 if (type == START_TAG || type == START_END_TAG || type == END_TAG)
308 {
309 tt.findTag(this);
310 }
311 }
312
313 /**
314 * Used to clone heading nodes when split by an hr.
315 * @see java.lang.Object#clone()
316 */
317 protected Object clone()
318 {
319 Node node;
320 try
321 {
322 node = (Node) super.clone();
323 }
324 catch (CloneNotSupportedException e)
325 {
326
327 throw new RuntimeException("CloneNotSupportedException " + e.getMessage());
328 }
329 if (this.textarray != null)
330 {
331 node.textarray = new byte[this.end - this.start];
332 node.start = 0;
333 node.end = this.end - this.start;
334 if (node.end > 0)
335 {
336 System.arraycopy(this.textarray, this.start, node.textarray, node.start, node.end);
337 }
338 }
339 if (this.attributes != null)
340 {
341 node.attributes = (AttVal) this.attributes.clone();
342 }
343 return node;
344 }
345
346 /**
347 * Returns an attribute with the given name in the current node.
348 * @param name attribute name.
349 * @return AttVal instance or null if no attribute with the iven name is found
350 */
351 public AttVal getAttrByName(String name)
352 {
353 AttVal attr;
354
355 for (attr = this.attributes; attr != null; attr = attr.next)
356 {
357 if (name != null && attr.attribute != null && attr.attribute.equals(name))
358 {
359 break;
360 }
361 }
362
363 return attr;
364 }
365
366 /**
367 * Default method for checking an element's attributes.
368 * @param lexer Lexer
369 */
370 public void checkAttributes(Lexer lexer)
371 {
372 AttVal attval;
373
374 for (attval = this.attributes; attval != null; attval = attval.next)
375 {
376 attval.checkAttribute(lexer, this);
377 }
378 }
379
380 /**
381 * The same attribute name can't be used more than once in each element. Discard or join attributes according to
382 * configuration.
383 * @param lexer Lexer
384 */
385 public void repairDuplicateAttributes(Lexer lexer)
386 {
387 AttVal attval;
388
389 for (attval = this.attributes; attval != null;)
390 {
391 if (attval.asp == null && attval.php == null)
392 {
393 AttVal current;
394
395 for (current = attval.next; current != null;)
396 {
397 if (current.asp == null
398 && current.php == null
399 && attval.attribute != null
400 && attval.attribute.equalsIgnoreCase(current.attribute))
401 {
402 AttVal temp;
403
404 if ("class".equalsIgnoreCase(current.attribute) && lexer.configuration.joinClasses)
405 {
406
407 current.value = current.value + " " + attval.value;
408
409 temp = attval.next;
410
411 if (temp.next == null)
412 {
413 current = null;
414 }
415 else
416 {
417 current = current.next;
418 }
419
420 lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);
421
422 removeAttribute(attval);
423 attval = temp;
424 }
425 else if ("style".equalsIgnoreCase(current.attribute) && lexer.configuration.joinStyles)
426 {
427
428
429
430
431
432 int end = current.value.length() - 1;
433
434 if (current.value.charAt(end) == ';')
435 {
436
437 current.value = current.value + " " + attval.value;
438 }
439 else if (current.value.charAt(end) == '}')
440 {
441
442 current.value = current.value + " { " + attval.value + " }";
443 }
444 else
445 {
446
447 current.value = current.value + "; " + attval.value;
448 }
449
450 temp = attval.next;
451
452 if (temp.next == null)
453 {
454 current = null;
455 }
456 else
457 {
458 current = current.next;
459 }
460
461 lexer.report.attrError(lexer, this, attval, Report.JOINING_ATTRIBUTE);
462
463 removeAttribute(attval);
464 attval = temp;
465
466 }
467 else if (lexer.configuration.duplicateAttrs == Configuration.KEEP_LAST)
468 {
469 temp = current.next;
470
471 lexer.report.attrError(lexer, this, current, Report.REPEATED_ATTRIBUTE);
472
473 removeAttribute(current);
474 current = temp;
475 }
476 else
477 {
478 temp = attval.next;
479
480 if (attval.next == null)
481 {
482 current = null;
483 }
484 else
485 {
486 current = current.next;
487 }
488
489 lexer.report.attrError(lexer, this, attval, Report.REPEATED_ATTRIBUTE);
490
491 removeAttribute(attval);
492 attval = temp;
493 }
494 }
495 else
496 {
497 current = current.next;
498 }
499 }
500 attval = attval.next;
501 }
502 else
503 {
504 attval = attval.next;
505 }
506 }
507 }
508
509 /**
510 * Adds an attribute to the node.
511 * @param name attribute name
512 * @param value attribute value
513 */
514 public void addAttribute(String name, String value)
515 {
516 AttVal av = new AttVal(null, null, null, null, '"', name, value);
517 av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
518
519 if (this.attributes == null)
520 {
521 this.attributes = av;
522 }
523 else
524 {
525
526 AttVal here = this.attributes;
527
528 while (here.next != null)
529 {
530 here = here.next;
531 }
532
533 here.next = av;
534 }
535 }
536
537 /**
538 * Remove an attribute from node and then free it.
539 * @param attr attribute to remove
540 */
541 public void removeAttribute(AttVal attr)
542 {
543 AttVal av;
544 AttVal prev = null;
545 AttVal next;
546
547 for (av = this.attributes; av != null; av = next)
548 {
549 next = av.next;
550
551 if (av == attr)
552 {
553 if (prev != null)
554 {
555 prev.next = next;
556 }
557 else
558 {
559 this.attributes = next;
560 }
561 }
562 else
563 {
564 prev = av;
565 }
566 }
567 }
568
569 /**
570 * Find the doctype element.
571 * @return doctype node or null if not found
572 */
573 public Node findDocType()
574 {
575 Node node = this.content;
576
577 while (node != null && node.type != DOCTYPE_TAG)
578 {
579 node = node.next;
580 }
581
582 return node;
583 }
584
585 /**
586 * Discard the doctype node.
587 */
588 public void discardDocType()
589 {
590 Node node;
591
592 node = findDocType();
593 if (node != null)
594 {
595 if (node.prev != null)
596 {
597 node.prev.next = node.next;
598 }
599 else
600 {
601 node.parent.content = node.next;
602 }
603
604 if (node.next != null)
605 {
606 node.next.prev = node.prev;
607 }
608
609 node.next = null;
610 }
611 }
612
613 /**
614 * Remove node from markup tree and discard it.
615 * @param element discarded node
616 * @return next node
617 */
618 public static Node discardElement(Node element)
619 {
620 Node next = null;
621
622 if (element != null)
623 {
624 next = element.next;
625 element.removeNode();
626 }
627
628 return next;
629 }
630
631 /**
632 * Insert a node into markup tree.
633 * @param node to insert
634 */
635 public void insertNodeAtStart(Node node)
636 {
637 node.parent = this;
638
639 if (this.content == null)
640 {
641 this.last = node;
642 }
643 else
644 {
645 this.content.prev = node;
646 }
647
648 node.next = this.content;
649 node.prev = null;
650 this.content = node;
651 }
652
653 /**
654 * Insert node into markup tree.
655 * @param node Node to insert
656 */
657 public void insertNodeAtEnd(Node node)
658 {
659 node.parent = this;
660 node.prev = this.last;
661
662 if (this.last != null)
663 {
664 this.last.next = node;
665 }
666 else
667 {
668 this.content = node;
669 }
670
671 this.last = node;
672 }
673
674 /**
675 * Insert node into markup tree in pace of element which is moved to become the child of the node.
676 * @param element child node. Will be inserted as a child of element
677 * @param node parent node
678 */
679 public static void insertNodeAsParent(Node element, Node node)
680 {
681 node.content = element;
682 node.last = element;
683 node.parent = element.parent;
684 element.parent = node;
685
686 if (node.parent.content == element)
687 {
688 node.parent.content = node;
689 }
690
691 if (node.parent.last == element)
692 {
693 node.parent.last = node;
694 }
695
696 node.prev = element.prev;
697 element.prev = null;
698
699 if (node.prev != null)
700 {
701 node.prev.next = node;
702 }
703
704 node.next = element.next;
705 element.next = null;
706
707 if (node.next != null)
708 {
709 node.next.prev = node;
710 }
711 }
712
713 /**
714 * Insert node into markup tree before element.
715 * @param element child node. Will be insertedbefore element
716 * @param node following node
717 */
718 public static void insertNodeBeforeElement(Node element, Node node)
719 {
720 Node parent;
721
722 parent = element.parent;
723 node.parent = parent;
724 node.next = element;
725 node.prev = element.prev;
726 element.prev = node;
727
728 if (node.prev != null)
729 {
730 node.prev.next = node;
731 }
732
733 if (parent != null && parent.content == element)
734 {
735 parent.content = node;
736 }
737 }
738
739 /**
740 * Insert node into markup tree after element.
741 * @param node new node to insert
742 */
743 public void insertNodeAfterElement(Node node)
744 {
745 Node parent;
746
747 parent = this.parent;
748 node.parent = parent;
749
750
751 if (parent != null && parent.last == this)
752 {
753 parent.last = node;
754 }
755 else
756 {
757 node.next = this.next;
758
759 if (node.next != null)
760 {
761 node.next.prev = node;
762 }
763 }
764
765 this.next = node;
766 node.prev = this;
767 }
768
769 /**
770 * Trim an empty element.
771 * @param lexer Lexer
772 * @param element empty node to be removed
773 */
774 public static void trimEmptyElement(Lexer lexer, Node element)
775 {
776
777
778 if (lexer.configuration.trimEmpty)
779 {
780 TagTable tt = lexer.configuration.tt;
781
782 if (lexer.canPrune(element))
783 {
784 if (element.type != TEXT_NODE)
785 {
786 lexer.report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
787 }
788
789 discardElement(element);
790 }
791 else if (element.tag == tt.tagP && element.content == null)
792 {
793
794 Node node = lexer.inferredTag("br");
795 Node.coerceNode(lexer, element, tt.tagBr);
796 element.insertNodeAfterElement(node);
797 }
798 }
799 }
800
801 /**
802 * This maps <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>. If last child of
803 * element is a text node then trim trailing white space character moving it to after element's end tag.
804 * @param lexer Lexer
805 * @param element node
806 * @param last last child of element
807 */
808 public static void trimTrailingSpace(Lexer lexer, Node element, Node last)
809 {
810 byte c;
811 TagTable tt = lexer.configuration.tt;
812
813 if (last != null && last.type == Node.TEXT_NODE)
814 {
815 if (last.end > last.start)
816
817 {
818 c = lexer.lexbuf[last.end - 1];
819
820 if (c == 160 || c == (byte) ' ')
821 {
822
823
824 if (c == 160 && (element.tag == tt.tagTd || element.tag == tt.tagTh))
825 {
826 if (last.end > last.start + 1)
827 {
828 last.end -= 1;
829 }
830 }
831 else
832 {
833 last.end -= 1;
834
835 if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
836 && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD))
837 {
838 lexer.insertspace = true;
839 }
840 }
841 }
842 }
843
844 if (last.start == last.end)
845 {
846 trimEmptyElement(lexer, last);
847 }
848 }
849 }
850
851 /**
852 * Escapes the given tag.
853 * @param lexer Lexer
854 * @param element node to be escaped
855 * @return escaped node
856 */
857 protected static Node escapeTag(Lexer lexer, Node element)
858 {
859 Node node = lexer.newNode();
860 node.start = lexer.lexsize;
861 node.textarray = element.textarray;
862 lexer.addByte('<');
863
864 if (element.type == END_TAG)
865 {
866 lexer.addByte('/');
867 }
868
869 if (element.element != null)
870 {
871 lexer.addStringLiteral(element.element);
872 }
873 else if (element.type == DOCTYPE_TAG)
874 {
875 int i;
876
877 lexer.addByte('!');
878 lexer.addByte('D');
879 lexer.addByte('O');
880 lexer.addByte('C');
881 lexer.addByte('T');
882 lexer.addByte('Y');
883 lexer.addByte('P');
884 lexer.addByte('E');
885 lexer.addByte(' ');
886
887 for (i = element.start; i < element.end; ++i)
888 {
889 lexer.addByte(lexer.lexbuf[i]);
890 }
891 }
892
893 if (element.type == START_END_TAG)
894 {
895 lexer.addByte('/');
896 }
897
898 lexer.addByte('>');
899 node.end = lexer.lexsize;
900
901 return node;
902 }
903
904 /**
905 * Is the node content empty or blank? Assumes node is a text node.
906 * @param lexer Lexer
907 * @return <code>true</code> if the node content empty or blank
908 */
909 public boolean isBlank(Lexer lexer)
910 {
911 if (this.type == TEXT_NODE)
912 {
913 if (this.end == this.start)
914 {
915 return true;
916 }
917 if (this.end == this.start + 1 && lexer.lexbuf[this.end - 1] == ' ')
918 {
919 return true;
920 }
921 }
922 return false;
923 }
924
925 /**
926 * This maps <code><p> hello <em> world </em></code> to <code><p> hello <em> world </em></code>.
927 * Trims initial space, by moving it before the start tag, or if this element is the first in parent's content, then
928 * by discarding the space.
929 * @param lexer Lexer
930 * @param element parent node
931 * @param text text node
932 */
933 public static void trimInitialSpace(Lexer lexer, Node element, Node text)
934 {
935 Node prev, node;
936
937
938 if (text.type == TEXT_NODE && text.textarray[text.start] == (byte) ' ' && (text.start < text.end))
939 {
940 if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
941 && !TidyUtils.toBoolean(element.tag.model & Dict.CM_FIELD)
942 && element.parent.content != element)
943 {
944 prev = element.prev;
945
946 if (prev != null && prev.type == TEXT_NODE)
947 {
948 if (prev.textarray[prev.end - 1] != (byte) ' ')
949 {
950 prev.textarray[prev.end++] = (byte) ' ';
951 }
952
953 ++element.start;
954 }
955 else
956 {
957
958 node = lexer.newNode();
959
960
961
962
963
964 if (element.start >= element.end)
965 {
966 node.start = 0;
967 node.end = 1;
968 node.textarray = new byte[1];
969 }
970 else
971 {
972 node.start = element.start++;
973 node.end = element.start;
974 node.textarray = element.textarray;
975 }
976 node.textarray[node.start] = (byte) ' ';
977 node.prev = prev;
978 if (prev != null)
979 {
980 prev.next = node;
981 }
982 node.next = element;
983 element.prev = node;
984 node.parent = element.parent;
985 }
986 }
987
988
989 ++text.start;
990 }
991 }
992
993 /**
994 * Move initial and trailing space out. This routine maps: hello <em> world </em> to hello <em> world </em> and
995 * <em> hello </em> <strong>world </strong> to <em> hello </em> <strong>world </strong>.
996 * @param lexer Lexer
997 * @param element Node
998 */
999 public static void trimSpaces(Lexer lexer, Node element)
1000 {
1001 Node text = element.content;
1002 TagTable tt = lexer.configuration.tt;
1003
1004 if (text != null && text.type == Node.TEXT_NODE && element.tag != tt.tagPre)
1005 {
1006 trimInitialSpace(lexer, element, text);
1007 }
1008
1009 text = element.last;
1010
1011 if (text != null && text.type == Node.TEXT_NODE)
1012 {
1013 trimTrailingSpace(lexer, element, text);
1014 }
1015 }
1016
1017 /**
1018 * Is this node contained in a given tag?
1019 * @param tag descendant tag
1020 * @return <code>true</code> if node is contained in tag
1021 */
1022 public boolean isDescendantOf(Dict tag)
1023 {
1024 Node parent;
1025
1026 for (parent = this.parent; parent != null; parent = parent.parent)
1027 {
1028 if (parent.tag == tag)
1029 {
1030 return true;
1031 }
1032 }
1033
1034 return false;
1035 }
1036
1037 /**
1038 * The doctype has been found after other tags, and needs moving to before the html element.
1039 * @param lexer Lexer
1040 * @param element document
1041 * @param doctype doctype node to insert at the beginning of element
1042 */
1043 public static void insertDocType(Lexer lexer, Node element, Node doctype)
1044 {
1045 TagTable tt = lexer.configuration.tt;
1046
1047 lexer.report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
1048
1049 while (element.tag != tt.tagHtml)
1050 {
1051 element = element.parent;
1052 }
1053
1054 insertNodeBeforeElement(element, doctype);
1055 }
1056
1057 /**
1058 * Find the body node.
1059 * @param tt tag table
1060 * @return body node
1061 */
1062 public Node findBody(TagTable tt)
1063 {
1064 Node node;
1065
1066 node = this.content;
1067
1068 while (node != null && node.tag != tt.tagHtml)
1069 {
1070 node = node.next;
1071 }
1072
1073 if (node == null)
1074 {
1075 return null;
1076 }
1077
1078 node = node.content;
1079
1080 while (node != null && node.tag != tt.tagBody && node.tag != tt.tagFrameset)
1081 {
1082 node = node.next;
1083 }
1084
1085 if (node.tag == tt.tagFrameset)
1086 {
1087 node = node.content;
1088
1089 while (node != null && node.tag != tt.tagNoframes)
1090 {
1091 node = node.next;
1092 }
1093
1094 if (node != null)
1095 {
1096 node = node.content;
1097 while (node != null && node.tag != tt.tagBody)
1098 {
1099 node = node.next;
1100 }
1101 }
1102 }
1103
1104 return node;
1105 }
1106
1107 /**
1108 * Is the node an element?
1109 * @return <code>true</code> if type is START_TAG | START_END_TAG
1110 */
1111 public boolean isElement()
1112 {
1113 return (this.type == START_TAG || this.type == START_END_TAG ? true : false);
1114 }
1115
1116 /**
1117 * Unexpected content in table row is moved to just before the table in accordance with Netscape and IE. This code
1118 * assumes that node hasn't been inserted into the row.
1119 * @param row Row node
1120 * @param node Node which should be moved before the table
1121 * @param tt tag table
1122 */
1123 public static void moveBeforeTable(Node row, Node node, TagTable tt)
1124 {
1125 Node table;
1126
1127
1128 for (table = row.parent; table != null; table = table.parent)
1129 {
1130 if (table.tag == tt.tagTable)
1131 {
1132 if (table.parent.content == table)
1133 {
1134 table.parent.content = node;
1135 }
1136
1137 node.prev = table.prev;
1138 node.next = table;
1139 table.prev = node;
1140 node.parent = table.parent;
1141
1142 if (node.prev != null)
1143 {
1144 node.prev.next = node;
1145 }
1146
1147 break;
1148 }
1149 }
1150 }
1151
1152 /**
1153 * If a table row is empty then insert an empty cell.This practice is consistent with browser behavior and avoids
1154 * potential problems with row spanning cells.
1155 * @param lexer Lexer
1156 * @param row row node
1157 */
1158 public static void fixEmptyRow(Lexer lexer, Node row)
1159 {
1160 Node cell;
1161
1162 if (row.content == null)
1163 {
1164 cell = lexer.inferredTag("td");
1165 row.insertNodeAtEnd(cell);
1166 lexer.report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
1167 }
1168 }
1169
1170 /**
1171 * Coerce a node.
1172 * @param lexer Lexer
1173 * @param node Node
1174 * @param tag tag dictionary reference
1175 */
1176 public static void coerceNode(Lexer lexer, Node node, Dict tag)
1177 {
1178 Node tmp = lexer.inferredTag(tag.name);
1179 lexer.report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
1180 node.was = node.tag;
1181 node.tag = tag;
1182 node.type = START_TAG;
1183 node.implicit = true;
1184 node.element = tag.name;
1185 }
1186
1187 /**
1188 * Extract this node and its children from a markup tree.
1189 */
1190 public void removeNode()
1191 {
1192 if (this.prev != null)
1193 {
1194 this.prev.next = this.next;
1195 }
1196
1197 if (this.next != null)
1198 {
1199 this.next.prev = this.prev;
1200 }
1201
1202 if (this.parent != null)
1203 {
1204 if (this.parent.content == this)
1205 {
1206 this.parent.content = this.next;
1207 }
1208
1209 if (this.parent.last == this)
1210 {
1211 this.parent.last = this.prev;
1212 }
1213 }
1214
1215 this.parent = null;
1216 this.prev = null;
1217 this.next = null;
1218 }
1219
1220 /**
1221 * Insert a node at the end.
1222 * @param element parent node
1223 * @param node will be inserted at the end of element
1224 * @return <code>true</code> if the node has been inserted
1225 */
1226 public static boolean insertMisc(Node element, Node node)
1227 {
1228 if (node.type == COMMENT_TAG
1229 || node.type == PROC_INS_TAG
1230 || node.type == CDATA_TAG
1231 || node.type == SECTION_TAG
1232 || node.type == ASP_TAG
1233 || node.type == JSTE_TAG
1234 || node.type == PHP_TAG
1235 || node.type == XML_DECL)
1236 {
1237 element.insertNodeAtEnd(node);
1238 return true;
1239 }
1240
1241 return false;
1242 }
1243
1244 /**
1245 * Is this a new (user defined) node? Used to determine how attributes without values should be printed. This was
1246 * introduced to deal with user defined tags e.g. Cold Fusion.
1247 * @return <code>true</code> if this node represents a user-defined tag.
1248 */
1249 public boolean isNewNode()
1250 {
1251 if (this.tag != null)
1252 {
1253 return TidyUtils.toBoolean(this.tag.model & Dict.CM_NEW);
1254 }
1255
1256 return true;
1257 }
1258
1259 /**
1260 * Does the node have one (and only one) child?
1261 * @return <code>true</code> if the node has one child
1262 */
1263 public boolean hasOneChild()
1264 {
1265 return (this.content != null && this.content.next == null);
1266 }
1267
1268 /**
1269 * Find the "html" element.
1270 * @param tt tag table
1271 * @return html node
1272 */
1273 public Node findHTML(TagTable tt)
1274 {
1275 Node node;
1276
1277 for (node = this.content; node != null && node.tag != tt.tagHtml; node = node.next)
1278 {
1279
1280 }
1281
1282 return node;
1283 }
1284
1285 /**
1286 * Find the head tag.
1287 * @param tt tag table
1288 * @return head node
1289 */
1290 public Node findHEAD(TagTable tt)
1291 {
1292 Node node;
1293
1294 node = this.findHTML(tt);
1295
1296 if (node != null)
1297 {
1298 for (node = node.content; node != null && node.tag != tt.tagHead; node = node.next)
1299 {
1300
1301 }
1302 }
1303
1304 return node;
1305 }
1306
1307 /**
1308 * Checks for node integrity.
1309 * @return false if node is not consistent
1310 */
1311 public boolean checkNodeIntegrity()
1312 {
1313 Node child;
1314 boolean found = false;
1315
1316 if (this.prev != null)
1317 {
1318 if (this.prev.next != this)
1319 {
1320 return false;
1321 }
1322 }
1323
1324 if (this.next != null)
1325 {
1326 if (this.next.prev != this)
1327 {
1328 return false;
1329 }
1330 }
1331
1332 if (this.parent != null)
1333 {
1334 if (this.prev == null && this.parent.content != this)
1335 {
1336 return false;
1337 }
1338
1339 if (this.next == null && this.parent.last != this)
1340 {
1341 return false;
1342 }
1343
1344 for (child = this.parent.content; child != null; child = child.next)
1345 {
1346 if (child == this)
1347 {
1348 found = true;
1349 break;
1350 }
1351 }
1352
1353 if (!found)
1354 {
1355 return false;
1356 }
1357 }
1358
1359 for (child = this.content; child != null; child = child.next)
1360 {
1361 if (!child.checkNodeIntegrity())
1362 {
1363 return false;
1364 }
1365 }
1366 return true;
1367 }
1368
1369 /**
1370 * Add a css class to the node. If a class attribute already exists adds the value to the existing attribute.
1371 * @param classname css class name
1372 */
1373 public void addClass(String classname)
1374 {
1375 AttVal classattr = this.getAttrByName("class");
1376
1377
1378 if (classattr != null)
1379 {
1380 classattr.value = classattr.value + " " + classname;
1381 }
1382 else
1383 {
1384
1385 this.addAttribute("class", classname);
1386 }
1387 }
1388
1389 /**
1390 * @see java.lang.Object#toString()
1391 */
1392 public String toString()
1393 {
1394 String s = "";
1395 Node n = this;
1396
1397 while (n != null)
1398 {
1399 s += "[Node type=";
1400 s += NODETYPE_STRING[n.type];
1401 s += ",element=";
1402 if (n.element != null)
1403 {
1404 s += n.element;
1405 }
1406 else
1407 {
1408 s += "null";
1409 }
1410 if (n.type == TEXT_NODE || n.type == COMMENT_TAG || n.type == PROC_INS_TAG)
1411 {
1412 s += ",text=";
1413 if (n.textarray != null && n.start <= n.end)
1414 {
1415 s += "\"";
1416 s += TidyUtils.getString(n.textarray, n.start, n.end - n.start);
1417 s += "\"";
1418 }
1419 else
1420 {
1421 s += "null";
1422 }
1423 }
1424 s += ",content=";
1425 if (n.content != null)
1426 {
1427 s += n.content.toString();
1428 }
1429 else
1430 {
1431 s += "null";
1432 }
1433 s += "]";
1434 if (n.next != null)
1435 {
1436 s += ",";
1437 }
1438 n = n.next;
1439 }
1440 return s;
1441 }
1442
1443 /**
1444 * Returns a DOM Node which wrap the current tidy Node.
1445 * @return org.w3c.dom.Node instance
1446 */
1447 protected org.w3c.dom.Node getAdapter()
1448 {
1449 if (adapter == null)
1450 {
1451 switch (this.type)
1452 {
1453 case ROOT_NODE :
1454 adapter = new DOMDocumentImpl(this);
1455 break;
1456 case START_TAG :
1457 case START_END_TAG :
1458 adapter = new DOMElementImpl(this);
1459 break;
1460 case DOCTYPE_TAG :
1461 adapter = new DOMDocumentTypeImpl(this);
1462 break;
1463 case COMMENT_TAG :
1464 adapter = new DOMCommentImpl(this);
1465 break;
1466 case TEXT_NODE :
1467 adapter = new DOMTextImpl(this);
1468 break;
1469 case CDATA_TAG :
1470 adapter = new DOMCDATASectionImpl(this);
1471 break;
1472 case PROC_INS_TAG :
1473 adapter = new DOMProcessingInstructionImpl(this);
1474 break;
1475 default :
1476 adapter = new DOMNodeImpl(this);
1477 }
1478 }
1479 return adapter;
1480 }
1481
1482 /**
1483 * Clone this node.
1484 * @param deep if true deep clone the node (also clones all the contained nodes)
1485 * @return cloned node
1486 */
1487 protected Node cloneNode(boolean deep)
1488 {
1489 Node node = (Node) this.clone();
1490 if (deep)
1491 {
1492 Node child;
1493 Node newChild;
1494 for (child = this.content; child != null; child = child.next)
1495 {
1496 newChild = child.cloneNode(deep);
1497 node.insertNodeAtEnd(newChild);
1498 }
1499 }
1500 return node;
1501 }
1502
1503 /**
1504 * Setter for node type.
1505 * @param newType a valid node type constant
1506 */
1507 protected void setType(short newType)
1508 {
1509 this.type = newType;
1510 }
1511
1512 /**
1513 * Used to check script node for script language.
1514 * @return <code>true</code> if the script node contains javascript
1515 */
1516 public boolean isJavaScript()
1517 {
1518 boolean result = false;
1519 AttVal attr;
1520
1521 if (this.attributes == null)
1522 {
1523 return true;
1524 }
1525
1526 for (attr = this.attributes; attr != null; attr = attr.next)
1527 {
1528 if (("language".equalsIgnoreCase(attr.attribute) || "type".equalsIgnoreCase(attr.attribute))
1529 && "javascript".equalsIgnoreCase(attr.value))
1530 {
1531 result = true;
1532 }
1533 }
1534
1535 return result;
1536 }
1537
1538 /**
1539 * Does the node expect contents?
1540 * @return <code>false</code> if this node should be empty
1541 */
1542 public boolean expectsContent()
1543 {
1544 if (this.type != Node.START_TAG)
1545 {
1546 return false;
1547 }
1548
1549
1550 if (this.tag == null)
1551 {
1552 return true;
1553 }
1554
1555 if (TidyUtils.toBoolean(this.tag.model & Dict.CM_EMPTY))
1556 {
1557 return false;
1558 }
1559
1560 return true;
1561 }
1562 }