View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  /**
57   * Check HTML attributes implementation.
58   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
59   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
60   * @author Fabrizio Giustina
61   * @version $Revision: 779 $ ($Author: fgiust $)
62   */
63  public final class TagCheckImpl
64  {
65  
66      /**
67       * CheckHTML instance.
68       */
69      public static final TagCheck HTML = new CheckHTML();
70  
71      /**
72       * CheckSCRIPT instance.
73       */
74      public static final TagCheck SCRIPT = new CheckSCRIPT();
75  
76      /**
77       * CheckTABLE instance.
78       */
79      public static final TagCheck TABLE = new CheckTABLE();
80  
81      /**
82       * CheckCaption instance.
83       */
84      public static final TagCheck CAPTION = new CheckCaption();
85  
86      /**
87       * CheckIMG instance.
88       */
89      public static final TagCheck IMG = new CheckIMG();
90  
91      /**
92       * CheckAREA instance.
93       */
94      public static final TagCheck AREA = new CheckAREA();
95  
96      /**
97       * CheckAnchor instance.
98       */
99      public static final TagCheck ANCHOR = new CheckAnchor();
100 
101     /**
102      * CheckMap instance.
103      */
104     public static final TagCheck MAP = new CheckMap();
105 
106     /**
107      * CheckSTYLE instance.
108      */
109     public static final TagCheck STYLE = new CheckSTYLE();
110 
111     /**
112      * CheckTableCell instance.
113      */
114     public static final TagCheck TABLECELL = new CheckTableCell();
115 
116     /**
117      * CheckLINK instance.
118      */
119     public static final TagCheck LINK = new CheckLINK();
120 
121     /**
122      * CheckHR instance.
123      */
124     public static final TagCheck HR = new CheckHR();
125 
126     /**
127      * CheckForm instance.
128      */
129     public static final TagCheck FORM = new CheckForm();
130 
131     /**
132      * CheckMeta instance.
133      */
134     public static final TagCheck META = new CheckMeta();
135 
136     /**
137      * don't instantiate.
138      */
139     private TagCheckImpl()
140     {
141         // unused
142     }
143 
144     /**
145      * Checker implementation for html tag.
146      */
147     public static class CheckHTML implements TagCheck
148     {
149 
150         /**
151          * xhtml namepace String.
152          */
153         private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
154 
155         /**
156          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
157          */
158         public void check(Lexer lexer, Node node)
159         {
160 
161             AttVal attval;
162             AttVal xmlns;
163 
164             xmlns = node.getAttrByName("xmlns");
165 
166             if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value))
167             {
168                 lexer.isvoyager = true;
169                 if (!lexer.configuration.htmlOut) // Unless user has specified plain HTML output,
170                 {
171                     lexer.configuration.xHTML = true; // output format will be XHTML.
172                 }
173                 // adjust other config options, just as in Configuration
174                 lexer.configuration.xmlOut = true;
175                 lexer.configuration.upperCaseTags = false;
176                 lexer.configuration.upperCaseAttrs = false;
177             }
178 
179             for (attval = node.attributes; attval != null; attval = attval.next)
180             {
181                 attval.checkAttribute(lexer, node);
182             }
183         }
184 
185     }
186 
187     /**
188      * Checker implementation for script tags.
189      */
190     public static class CheckSCRIPT implements TagCheck
191     {
192 
193         /**
194          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
195          */
196         public void check(Lexer lexer, Node node)
197         {
198             AttVal lang, type;
199 
200             node.checkAttributes(lexer);
201 
202             lang = node.getAttrByName("language");
203             type = node.getAttrByName("type");
204 
205             if (type == null)
206             {
207                 AttVal missingType = new AttVal(null, null, '"', "type", "");
208                 lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
209 
210                 // check for javascript
211                 if (lang != null)
212                 {
213                     String str = lang.value;
214                     if ("javascript".equalsIgnoreCase(str) || "jscript".equalsIgnoreCase(str))
215                     {
216                         node.addAttribute("type", "text/javascript");
217                     }
218                     else if ("vbscript".equalsIgnoreCase(str))
219                     {
220                         // per Randy Waki 8/6/01
221                         node.addAttribute("type", "text/vbscript");
222                     }
223                 }
224                 else
225                 {
226                     node.addAttribute("type", "text/javascript");
227                 }
228             }
229         }
230 
231     }
232 
233     /**
234      * Checker implementation for table.
235      */
236     public static class CheckTABLE implements TagCheck
237     {
238 
239         /**
240          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
241          */
242         public void check(Lexer lexer, Node node)
243         {
244             AttVal attval;
245             Attribute attribute;
246             boolean hasSummary = false;
247 
248             for (attval = node.attributes; attval != null; attval = attval.next)
249             {
250                 attribute = attval.checkAttribute(lexer, node);
251 
252                 if (attribute == AttributeTable.attrSummary)
253                 {
254                     hasSummary = true;
255                 }
256             }
257 
258             /* suppress warning for missing summary for HTML 2.0 and HTML 3.2 */
259             if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32)
260             {
261                 lexer.badAccess |= Report.MISSING_SUMMARY;
262 
263                 // summary is not required, should be only an accessibility warning
264                 // AttVal missingSummary = new AttVal(null, null, '"', "summary", "");
265                 // lexer.report.attrError(lexer, node, missingSummary, Report.MISSING_ATTRIBUTE);
266             }
267 
268             /* convert <table border> to <table border="1"> */
269             if (lexer.configuration.xmlOut)
270             {
271                 attval = node.getAttrByName("border");
272                 if (attval != null)
273                 {
274                     if (attval.value == null)
275                     {
276                         attval.value = "1";
277                     }
278                 }
279             }
280 
281             /* <table height="..."> is proprietary */
282             if ((attval = node.getAttrByName("height")) != null)
283             {
284                 lexer.report.attrError(lexer, node, attval, Report.PROPRIETARY_ATTRIBUTE);
285                 lexer.versions &= Dict.VERS_PROPRIETARY;
286             }
287 
288         }
289 
290     }
291 
292     /**
293      * Checker implementation for table caption.
294      */
295     public static class CheckCaption implements TagCheck
296     {
297 
298         /**
299          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
300          */
301         public void check(Lexer lexer, Node node)
302         {
303             AttVal attval;
304             String value = null;
305 
306             node.checkAttributes(lexer);
307 
308             for (attval = node.attributes; attval != null; attval = attval.next)
309             {
310                 if ("align".equalsIgnoreCase(attval.attribute))
311                 {
312                     value = attval.value;
313                     break;
314                 }
315             }
316 
317             if (value != null)
318             {
319                 if ("left".equalsIgnoreCase(value) || "right".equalsIgnoreCase(value))
320                 {
321                     lexer.constrainVersion(Dict.VERS_HTML40_LOOSE);
322                 }
323                 else if ("top".equalsIgnoreCase(value) || "bottom".equalsIgnoreCase(value))
324                 {
325                     lexer.constrainVersion(~(Dict.VERS_HTML20 | Dict.VERS_HTML32));
326                 }
327                 else
328                 {
329                     lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
330                 }
331             }
332         }
333 
334     }
335 
336     /**
337      * Checker implementation for hr.
338      */
339     public static class CheckHR implements TagCheck
340     {
341 
342         /**
343          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
344          */
345         public void check(Lexer lexer, Node node)
346         {
347             AttVal av = node.getAttrByName("src");
348 
349             node.checkAttributes(lexer);
350 
351             if (av != null)
352             {
353                 lexer.report.attrError(lexer, node, av, Report.PROPRIETARY_ATTR_VALUE);
354             }
355         }
356     }
357 
358     /**
359      * Checker implementation for image tags.
360      */
361     public static class CheckIMG implements TagCheck
362     {
363 
364         /**
365          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
366          */
367         public void check(Lexer lexer, Node node)
368         {
369             AttVal attval;
370             Attribute attribute;
371             boolean hasAlt = false;
372             boolean hasSrc = false;
373             boolean hasUseMap = false;
374             boolean hasIsMap = false;
375             boolean hasDataFld = false;
376 
377             for (attval = node.attributes; attval != null; attval = attval.next)
378             {
379                 attribute = attval.checkAttribute(lexer, node);
380 
381                 if (attribute == AttributeTable.attrAlt)
382                 {
383                     hasAlt = true;
384                 }
385                 else if (attribute == AttributeTable.attrSrc)
386                 {
387                     hasSrc = true;
388                 }
389                 else if (attribute == AttributeTable.attrUsemap)
390                 {
391                     hasUseMap = true;
392                 }
393                 else if (attribute == AttributeTable.attrIsmap)
394                 {
395                     hasIsMap = true;
396                 }
397                 else if (attribute == AttributeTable.attrDatafld)
398                 {
399                     hasDataFld = true;
400                 }
401                 else if (attribute == AttributeTable.attrWidth || attribute == AttributeTable.attrHeight)
402                 {
403                     lexer.constrainVersion(~Dict.VERS_HTML20);
404                 }
405             }
406 
407             if (!hasAlt)
408             {
409                 lexer.badAccess |= Report.MISSING_IMAGE_ALT;
410                 AttVal missingAlt = new AttVal(null, null, '"', "alt", "");
411                 lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE);
412                 if (lexer.configuration.altText != null)
413                 {
414                     node.addAttribute("alt", lexer.configuration.altText);
415                 }
416             }
417 
418             if (!hasSrc && !hasDataFld)
419             {
420                 AttVal missingSrc = new AttVal(null, null, '"', "src", "");
421                 lexer.report.attrError(lexer, node, missingSrc, Report.MISSING_ATTRIBUTE);
422             }
423 
424             if (hasIsMap && !hasUseMap)
425             {
426                 AttVal missingIsMap = new AttVal(null, null, '"', "ismap", "");
427                 lexer.report.attrError(lexer, node, missingIsMap, Report.MISSING_IMAGEMAP);
428             }
429         }
430 
431     }
432 
433     /**
434      * Checker implementation for area.
435      */
436     public static class CheckAREA implements TagCheck
437     {
438 
439         /**
440          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
441          */
442         public void check(Lexer lexer, Node node)
443         {
444             AttVal attval;
445             Attribute attribute;
446             boolean hasAlt = false;
447             boolean hasHref = false;
448 
449             for (attval = node.attributes; attval != null; attval = attval.next)
450             {
451                 attribute = attval.checkAttribute(lexer, node);
452 
453                 if (attribute == AttributeTable.attrAlt)
454                 {
455                     hasAlt = true;
456                 }
457                 else if (attribute == AttributeTable.attrHref)
458                 {
459                     hasHref = true;
460                 }
461             }
462 
463             if (!hasAlt)
464             {
465                 lexer.badAccess |= Report.MISSING_LINK_ALT;
466                 AttVal missingAlt = new AttVal(null, null, '"', "alt", "");
467                 lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE);
468             }
469             if (!hasHref)
470             {
471                 AttVal missingHref = new AttVal(null, null, '"', "href", "");
472                 lexer.report.attrError(lexer, node, missingHref, Report.MISSING_ATTRIBUTE);
473             }
474         }
475 
476     }
477 
478     /**
479      * Checker implementation for anchors.
480      */
481     public static class CheckAnchor implements TagCheck
482     {
483 
484         /**
485          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
486          */
487         public void check(Lexer lexer, Node node)
488         {
489             node.checkAttributes(lexer);
490 
491             lexer.fixId(node);
492         }
493     }
494 
495     /**
496      * Checker implementation for image maps.
497      */
498     public static class CheckMap implements TagCheck
499     {
500 
501         /**
502          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
503          */
504         public void check(Lexer lexer, Node node)
505         {
506             node.checkAttributes(lexer);
507 
508             lexer.fixId(node);
509         }
510     }
511 
512     /**
513      * Checker implementation for style tags.
514      */
515     public static class CheckSTYLE implements TagCheck
516     {
517 
518         /**
519          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
520          */
521         public void check(Lexer lexer, Node node)
522         {
523             AttVal type = node.getAttrByName("type");
524 
525             node.checkAttributes(lexer);
526 
527             if (type == null)
528             {
529                 AttVal missingType = new AttVal(null, null, '"', "type", "");
530                 lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
531 
532                 node.addAttribute("type", "text/css");
533             }
534         }
535     }
536 
537     /**
538      * Checker implementation for forms. Reports missing action attribute.
539      */
540     public static class CheckForm implements TagCheck
541     {
542 
543         /**
544          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
545          */
546         public void check(Lexer lexer, Node node)
547         {
548             AttVal action = node.getAttrByName("action");
549 
550             node.checkAttributes(lexer);
551 
552             if (action == null)
553             {
554                 AttVal missingAttribute = new AttVal(null, null, '"', "action", "");
555                 lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE);
556             }
557         }
558     }
559 
560     /**
561      * Checker implementation for meta tags. Reports missing content attribute.
562      */
563     public static class CheckMeta implements TagCheck
564     {
565 
566         /**
567          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
568          */
569         public void check(Lexer lexer, Node node)
570         {
571             AttVal content = node.getAttrByName("content");
572 
573             node.checkAttributes(lexer);
574 
575             if (content == null)
576             {
577                 AttVal missingAttribute = new AttVal(null, null, '"', "content", "");
578                 lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE);
579             }
580 
581             // name or http-equiv attribute must also be set
582         }
583     }
584 
585     /**
586      * Checker implementation for table cells.
587      */
588     public static class CheckTableCell implements TagCheck
589     {
590 
591         /**
592          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
593          */
594         public void check(Lexer lexer, Node node)
595         {
596             node.checkAttributes(lexer);
597 
598             // HTML4 strict doesn't allow mixed content for elements with %block; as their content model
599 
600             if (node.getAttrByName("width") != null || node.getAttrByName("height") != null)
601             {
602                 lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
603             }
604         }
605     }
606 
607     /**
608      * add missing type attribute when appropriate.
609      */
610     public static class CheckLINK implements TagCheck
611     {
612 
613         /**
614          * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
615          */
616         public void check(Lexer lexer, Node node)
617         {
618             AttVal rel = node.getAttrByName("rel");
619 
620             node.checkAttributes(lexer);
621 
622             if (rel != null && rel.value != null && rel.value.equals("stylesheet"))
623             {
624                 AttVal type = node.getAttrByName("type");
625 
626                 if (type == null)
627                 {
628                     AttVal missingType = new AttVal(null, null, '"', "type", "");
629                     lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
630 
631                     node.addAttribute("type", "text/css");
632                 }
633             }
634         }
635     }
636 
637 }