1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 /**
57 * Check HTML attributes implementation.
58 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
59 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
60 * @author Fabrizio Giustina
61 * @version $Revision: 779 $ ($Author: fgiust $)
62 */
63 public final class TagCheckImpl
64 {
65
66 /**
67 * CheckHTML instance.
68 */
69 public static final TagCheck HTML = new CheckHTML();
70
71 /**
72 * CheckSCRIPT instance.
73 */
74 public static final TagCheck SCRIPT = new CheckSCRIPT();
75
76 /**
77 * CheckTABLE instance.
78 */
79 public static final TagCheck TABLE = new CheckTABLE();
80
81 /**
82 * CheckCaption instance.
83 */
84 public static final TagCheck CAPTION = new CheckCaption();
85
86 /**
87 * CheckIMG instance.
88 */
89 public static final TagCheck IMG = new CheckIMG();
90
91 /**
92 * CheckAREA instance.
93 */
94 public static final TagCheck AREA = new CheckAREA();
95
96 /**
97 * CheckAnchor instance.
98 */
99 public static final TagCheck ANCHOR = new CheckAnchor();
100
101 /**
102 * CheckMap instance.
103 */
104 public static final TagCheck MAP = new CheckMap();
105
106 /**
107 * CheckSTYLE instance.
108 */
109 public static final TagCheck STYLE = new CheckSTYLE();
110
111 /**
112 * CheckTableCell instance.
113 */
114 public static final TagCheck TABLECELL = new CheckTableCell();
115
116 /**
117 * CheckLINK instance.
118 */
119 public static final TagCheck LINK = new CheckLINK();
120
121 /**
122 * CheckHR instance.
123 */
124 public static final TagCheck HR = new CheckHR();
125
126 /**
127 * CheckForm instance.
128 */
129 public static final TagCheck FORM = new CheckForm();
130
131 /**
132 * CheckMeta instance.
133 */
134 public static final TagCheck META = new CheckMeta();
135
136 /**
137 * don't instantiate.
138 */
139 private TagCheckImpl()
140 {
141
142 }
143
144 /**
145 * Checker implementation for html tag.
146 */
147 public static class CheckHTML implements TagCheck
148 {
149
150 /**
151 * xhtml namepace String.
152 */
153 private static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
154
155 /**
156 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
157 */
158 public void check(Lexer lexer, Node node)
159 {
160
161 AttVal attval;
162 AttVal xmlns;
163
164 xmlns = node.getAttrByName("xmlns");
165
166 if (xmlns != null && XHTML_NAMESPACE.equals(xmlns.value))
167 {
168 lexer.isvoyager = true;
169 if (!lexer.configuration.htmlOut)
170 {
171 lexer.configuration.xHTML = true;
172 }
173
174 lexer.configuration.xmlOut = true;
175 lexer.configuration.upperCaseTags = false;
176 lexer.configuration.upperCaseAttrs = false;
177 }
178
179 for (attval = node.attributes; attval != null; attval = attval.next)
180 {
181 attval.checkAttribute(lexer, node);
182 }
183 }
184
185 }
186
187 /**
188 * Checker implementation for script tags.
189 */
190 public static class CheckSCRIPT implements TagCheck
191 {
192
193 /**
194 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
195 */
196 public void check(Lexer lexer, Node node)
197 {
198 AttVal lang, type;
199
200 node.checkAttributes(lexer);
201
202 lang = node.getAttrByName("language");
203 type = node.getAttrByName("type");
204
205 if (type == null)
206 {
207 AttVal missingType = new AttVal(null, null, '"', "type", "");
208 lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
209
210
211 if (lang != null)
212 {
213 String str = lang.value;
214 if ("javascript".equalsIgnoreCase(str) || "jscript".equalsIgnoreCase(str))
215 {
216 node.addAttribute("type", "text/javascript");
217 }
218 else if ("vbscript".equalsIgnoreCase(str))
219 {
220
221 node.addAttribute("type", "text/vbscript");
222 }
223 }
224 else
225 {
226 node.addAttribute("type", "text/javascript");
227 }
228 }
229 }
230
231 }
232
233 /**
234 * Checker implementation for table.
235 */
236 public static class CheckTABLE implements TagCheck
237 {
238
239 /**
240 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
241 */
242 public void check(Lexer lexer, Node node)
243 {
244 AttVal attval;
245 Attribute attribute;
246 boolean hasSummary = false;
247
248 for (attval = node.attributes; attval != null; attval = attval.next)
249 {
250 attribute = attval.checkAttribute(lexer, node);
251
252 if (attribute == AttributeTable.attrSummary)
253 {
254 hasSummary = true;
255 }
256 }
257
258
259 if (!hasSummary && lexer.doctype != Dict.VERS_HTML20 && lexer.doctype != Dict.VERS_HTML32)
260 {
261 lexer.badAccess |= Report.MISSING_SUMMARY;
262
263
264
265
266 }
267
268
269 if (lexer.configuration.xmlOut)
270 {
271 attval = node.getAttrByName("border");
272 if (attval != null)
273 {
274 if (attval.value == null)
275 {
276 attval.value = "1";
277 }
278 }
279 }
280
281
282 if ((attval = node.getAttrByName("height")) != null)
283 {
284 lexer.report.attrError(lexer, node, attval, Report.PROPRIETARY_ATTRIBUTE);
285 lexer.versions &= Dict.VERS_PROPRIETARY;
286 }
287
288 }
289
290 }
291
292 /**
293 * Checker implementation for table caption.
294 */
295 public static class CheckCaption implements TagCheck
296 {
297
298 /**
299 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
300 */
301 public void check(Lexer lexer, Node node)
302 {
303 AttVal attval;
304 String value = null;
305
306 node.checkAttributes(lexer);
307
308 for (attval = node.attributes; attval != null; attval = attval.next)
309 {
310 if ("align".equalsIgnoreCase(attval.attribute))
311 {
312 value = attval.value;
313 break;
314 }
315 }
316
317 if (value != null)
318 {
319 if ("left".equalsIgnoreCase(value) || "right".equalsIgnoreCase(value))
320 {
321 lexer.constrainVersion(Dict.VERS_HTML40_LOOSE);
322 }
323 else if ("top".equalsIgnoreCase(value) || "bottom".equalsIgnoreCase(value))
324 {
325 lexer.constrainVersion(~(Dict.VERS_HTML20 | Dict.VERS_HTML32));
326 }
327 else
328 {
329 lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
330 }
331 }
332 }
333
334 }
335
336 /**
337 * Checker implementation for hr.
338 */
339 public static class CheckHR implements TagCheck
340 {
341
342 /**
343 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
344 */
345 public void check(Lexer lexer, Node node)
346 {
347 AttVal av = node.getAttrByName("src");
348
349 node.checkAttributes(lexer);
350
351 if (av != null)
352 {
353 lexer.report.attrError(lexer, node, av, Report.PROPRIETARY_ATTR_VALUE);
354 }
355 }
356 }
357
358 /**
359 * Checker implementation for image tags.
360 */
361 public static class CheckIMG implements TagCheck
362 {
363
364 /**
365 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
366 */
367 public void check(Lexer lexer, Node node)
368 {
369 AttVal attval;
370 Attribute attribute;
371 boolean hasAlt = false;
372 boolean hasSrc = false;
373 boolean hasUseMap = false;
374 boolean hasIsMap = false;
375 boolean hasDataFld = false;
376
377 for (attval = node.attributes; attval != null; attval = attval.next)
378 {
379 attribute = attval.checkAttribute(lexer, node);
380
381 if (attribute == AttributeTable.attrAlt)
382 {
383 hasAlt = true;
384 }
385 else if (attribute == AttributeTable.attrSrc)
386 {
387 hasSrc = true;
388 }
389 else if (attribute == AttributeTable.attrUsemap)
390 {
391 hasUseMap = true;
392 }
393 else if (attribute == AttributeTable.attrIsmap)
394 {
395 hasIsMap = true;
396 }
397 else if (attribute == AttributeTable.attrDatafld)
398 {
399 hasDataFld = true;
400 }
401 else if (attribute == AttributeTable.attrWidth || attribute == AttributeTable.attrHeight)
402 {
403 lexer.constrainVersion(~Dict.VERS_HTML20);
404 }
405 }
406
407 if (!hasAlt)
408 {
409 lexer.badAccess |= Report.MISSING_IMAGE_ALT;
410 AttVal missingAlt = new AttVal(null, null, '"', "alt", "");
411 lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE);
412 if (lexer.configuration.altText != null)
413 {
414 node.addAttribute("alt", lexer.configuration.altText);
415 }
416 }
417
418 if (!hasSrc && !hasDataFld)
419 {
420 AttVal missingSrc = new AttVal(null, null, '"', "src", "");
421 lexer.report.attrError(lexer, node, missingSrc, Report.MISSING_ATTRIBUTE);
422 }
423
424 if (hasIsMap && !hasUseMap)
425 {
426 AttVal missingIsMap = new AttVal(null, null, '"', "ismap", "");
427 lexer.report.attrError(lexer, node, missingIsMap, Report.MISSING_IMAGEMAP);
428 }
429 }
430
431 }
432
433 /**
434 * Checker implementation for area.
435 */
436 public static class CheckAREA implements TagCheck
437 {
438
439 /**
440 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
441 */
442 public void check(Lexer lexer, Node node)
443 {
444 AttVal attval;
445 Attribute attribute;
446 boolean hasAlt = false;
447 boolean hasHref = false;
448
449 for (attval = node.attributes; attval != null; attval = attval.next)
450 {
451 attribute = attval.checkAttribute(lexer, node);
452
453 if (attribute == AttributeTable.attrAlt)
454 {
455 hasAlt = true;
456 }
457 else if (attribute == AttributeTable.attrHref)
458 {
459 hasHref = true;
460 }
461 }
462
463 if (!hasAlt)
464 {
465 lexer.badAccess |= Report.MISSING_LINK_ALT;
466 AttVal missingAlt = new AttVal(null, null, '"', "alt", "");
467 lexer.report.attrError(lexer, node, missingAlt, Report.MISSING_ATTRIBUTE);
468 }
469 if (!hasHref)
470 {
471 AttVal missingHref = new AttVal(null, null, '"', "href", "");
472 lexer.report.attrError(lexer, node, missingHref, Report.MISSING_ATTRIBUTE);
473 }
474 }
475
476 }
477
478 /**
479 * Checker implementation for anchors.
480 */
481 public static class CheckAnchor implements TagCheck
482 {
483
484 /**
485 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
486 */
487 public void check(Lexer lexer, Node node)
488 {
489 node.checkAttributes(lexer);
490
491 lexer.fixId(node);
492 }
493 }
494
495 /**
496 * Checker implementation for image maps.
497 */
498 public static class CheckMap implements TagCheck
499 {
500
501 /**
502 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
503 */
504 public void check(Lexer lexer, Node node)
505 {
506 node.checkAttributes(lexer);
507
508 lexer.fixId(node);
509 }
510 }
511
512 /**
513 * Checker implementation for style tags.
514 */
515 public static class CheckSTYLE implements TagCheck
516 {
517
518 /**
519 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
520 */
521 public void check(Lexer lexer, Node node)
522 {
523 AttVal type = node.getAttrByName("type");
524
525 node.checkAttributes(lexer);
526
527 if (type == null)
528 {
529 AttVal missingType = new AttVal(null, null, '"', "type", "");
530 lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
531
532 node.addAttribute("type", "text/css");
533 }
534 }
535 }
536
537 /**
538 * Checker implementation for forms. Reports missing action attribute.
539 */
540 public static class CheckForm implements TagCheck
541 {
542
543 /**
544 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
545 */
546 public void check(Lexer lexer, Node node)
547 {
548 AttVal action = node.getAttrByName("action");
549
550 node.checkAttributes(lexer);
551
552 if (action == null)
553 {
554 AttVal missingAttribute = new AttVal(null, null, '"', "action", "");
555 lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE);
556 }
557 }
558 }
559
560 /**
561 * Checker implementation for meta tags. Reports missing content attribute.
562 */
563 public static class CheckMeta implements TagCheck
564 {
565
566 /**
567 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
568 */
569 public void check(Lexer lexer, Node node)
570 {
571 AttVal content = node.getAttrByName("content");
572
573 node.checkAttributes(lexer);
574
575 if (content == null)
576 {
577 AttVal missingAttribute = new AttVal(null, null, '"', "content", "");
578 lexer.report.attrError(lexer, node, missingAttribute, Report.MISSING_ATTRIBUTE);
579 }
580
581
582 }
583 }
584
585 /**
586 * Checker implementation for table cells.
587 */
588 public static class CheckTableCell implements TagCheck
589 {
590
591 /**
592 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
593 */
594 public void check(Lexer lexer, Node node)
595 {
596 node.checkAttributes(lexer);
597
598
599
600 if (node.getAttrByName("width") != null || node.getAttrByName("height") != null)
601 {
602 lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
603 }
604 }
605 }
606
607 /**
608 * add missing type attribute when appropriate.
609 */
610 public static class CheckLINK implements TagCheck
611 {
612
613 /**
614 * @see org.w3c.tidy.TagCheck#check(org.w3c.tidy.Lexer, org.w3c.tidy.Node)
615 */
616 public void check(Lexer lexer, Node node)
617 {
618 AttVal rel = node.getAttrByName("rel");
619
620 node.checkAttributes(lexer);
621
622 if (rel != null && rel.value != null && rel.value.equals("stylesheet"))
623 {
624 AttVal type = node.getAttrByName("type");
625
626 if (type == null)
627 {
628 AttVal missingType = new AttVal(null, null, '"', "type", "");
629 lexer.report.attrError(lexer, node, missingType, Report.MISSING_ATTRIBUTE);
630
631 node.addAttribute("type", "text/css");
632 }
633 }
634 }
635 }
636
637 }