1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 import java.util.Iterator;
57 import java.util.List;
58 import java.util.StringTokenizer;
59
60
61 /**
62 * Property parser instances.
63 * @author Fabrizio Giustina
64 * @version $Revision $ ($Author $)
65 */
66 public final class ParsePropertyImpl
67 {
68
69 /**
70 * configuration parser for int values.
71 */
72 static final ParseProperty INT = new ParseInt();
73
74 /**
75 * configuration parser for boolean values.
76 */
77 static final ParseProperty BOOL = new ParseBoolean();
78
79 /**
80 * configuration parser for inverted boolean values.
81 */
82 static final ParseProperty INVBOOL = new ParseInvBoolean();
83
84 /**
85 * configuration parser for char encoding values.
86 */
87 static final ParseProperty CHAR_ENCODING = new ParseCharEncoding();
88
89 /**
90 * configuration parser for name values.
91 */
92 static final ParseProperty NAME = new ParseName();
93
94 /**
95 * configuration parser for tag names.
96 */
97 static final ParseProperty TAGNAMES = new ParseTagNames();
98
99 /**
100 * configuration parser for doctype property.
101 */
102 static final ParseProperty DOCTYPE = new ParseDocType();
103
104 /**
105 * configuration parser for repetated attribute property.
106 */
107 static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute();
108
109 /**
110 * configuration parser for String values.
111 */
112 static final ParseProperty STRING = new ParseString();
113
114 /**
115 * configuration parser for indent property.
116 */
117 static final ParseProperty INDENT = new ParseIndent();
118
119 /**
120 * configuration parser for css selectors.
121 */
122 static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector();
123
124 /**
125 * configuration parser for new line bytes.
126 */
127 static final ParseProperty NEWLINE = new ParseNewLine();
128
129 /**
130 * don't instantiate.
131 */
132 private ParsePropertyImpl()
133 {
134
135 }
136
137 /**
138 * parser for integer values.
139 */
140 static class ParseInt implements ParseProperty
141 {
142
143 /**
144 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
145 */
146 public Object parse(String value, String option, Configuration configuration)
147 {
148 int i = 0;
149 try
150 {
151 i = Integer.parseInt(value);
152 }
153 catch (NumberFormatException e)
154 {
155 configuration.report.badArgument(value, option);
156 i = -1;
157 }
158 return new Integer(i);
159 }
160
161 /**
162 * @see org.w3c.tidy.ParseProperty#getType()
163 */
164 public String getType()
165 {
166 return "Integer";
167 }
168
169 /**
170 * @see org.w3c.tidy.ParseProperty#getOptionValues()
171 */
172 public String getOptionValues()
173 {
174 return "0, 1, 2, ...";
175 }
176
177 /**
178 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
179 */
180 public String getFriendlyName(String option, Object value, Configuration configuration)
181 {
182 return value == null ? "" : value.toString();
183 }
184 }
185
186 /**
187 * parser for boolean values.
188 */
189 static class ParseBoolean implements ParseProperty
190 {
191
192 /**
193 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
194 */
195 public Object parse(String value, String option, Configuration configuration)
196 {
197 Boolean b = Boolean.TRUE;
198 if (value != null && value.length() > 0)
199 {
200 char c = value.charAt(0);
201 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
202 {
203 b = Boolean.TRUE;
204 }
205 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
206 {
207 b = Boolean.FALSE;
208 }
209 else
210 {
211 configuration.report.badArgument(value, option);
212 }
213 }
214 return b;
215 }
216
217 /**
218 * @see org.w3c.tidy.ParseProperty#getType()
219 */
220 public String getType()
221 {
222 return "Boolean";
223 }
224
225 /**
226 * @see org.w3c.tidy.ParseProperty#getOptionValues()
227 */
228 public String getOptionValues()
229 {
230 return "y/n, yes/no, t/f, true/false, 1/0";
231 }
232
233 /**
234 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
235 */
236 public String getFriendlyName(String option, Object value, Configuration configuration)
237 {
238 if (value == null)
239 {
240 return "";
241 }
242
243 return ((Boolean) value).booleanValue() ? "yes" : "no";
244 }
245 }
246
247 /**
248 * parser for boolean values.
249 */
250 static class ParseInvBoolean implements ParseProperty
251 {
252
253 /**
254 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
255 */
256 public Object parse(String value, String option, Configuration configuration)
257 {
258 return (((Boolean) BOOL.parse(value, option, configuration)).booleanValue() ? Boolean.FALSE : Boolean.TRUE);
259 }
260
261 /**
262 * @see org.w3c.tidy.ParseProperty#getType()
263 */
264 public String getType()
265 {
266 return "Boolean";
267 }
268
269 /**
270 * @see org.w3c.tidy.ParseProperty#getOptionValues()
271 */
272 public String getOptionValues()
273 {
274 return "yes, no, true, false";
275 }
276
277 /**
278 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
279 */
280 public String getFriendlyName(String option, Object value, Configuration configuration)
281 {
282 if (value == null)
283 {
284 return "";
285 }
286
287 return ((Boolean) value).booleanValue() ? "no" : "yes";
288 }
289 }
290
291 /**
292 * parse character encoding option. Can be any java encoding name supported by the runtime platform.
293 */
294 static class ParseCharEncoding implements ParseProperty
295 {
296
297 /**
298 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
299 */
300 public Object parse(String value, String option, Configuration configuration)
301 {
302
303 if ("raw".equalsIgnoreCase(value))
304 {
305
306 configuration.rawOut = true;
307 }
308 else if (!TidyUtils.isCharEncodingSupported(value))
309 {
310 configuration.report.badArgument(value, option);
311 }
312 else if ("input-encoding".equalsIgnoreCase(option))
313 {
314 configuration.setInCharEncodingName(value);
315 }
316 else if ("output-encoding".equalsIgnoreCase(option))
317 {
318 configuration.setOutCharEncodingName(value);
319 }
320 else if ("char-encoding".equalsIgnoreCase(option))
321 {
322 configuration.setInCharEncodingName(value);
323 configuration.setOutCharEncodingName(value);
324 }
325
326 return null;
327 }
328
329 /**
330 * @see org.w3c.tidy.ParseProperty#getType()
331 */
332 public String getType()
333 {
334 return "Encoding";
335 }
336
337 /**
338 * @see org.w3c.tidy.ParseProperty#getOptionValues()
339 */
340 public String getOptionValues()
341 {
342
343 return "Any valid java char encoding name";
344 }
345
346 /**
347 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
348 */
349 public String getFriendlyName(String option, Object value, Configuration configuration)
350 {
351 if ("output-encoding".equalsIgnoreCase(option))
352 {
353 return configuration.getOutCharEncodingName();
354 }
355
356
357 return configuration.getInCharEncodingName();
358 }
359 }
360
361 /**
362 * parser for name values (a string excluding whitespace).
363 */
364 static class ParseName implements ParseProperty
365 {
366
367 /**
368 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
369 */
370 public Object parse(String value, String option, Configuration configuration)
371 {
372 StringTokenizer t = new StringTokenizer(value);
373 String rs = null;
374 if (t.countTokens() >= 1)
375 {
376 rs = t.nextToken();
377 }
378 else
379 {
380 configuration.report.badArgument(value, option);
381 }
382 return rs;
383 }
384
385 /**
386 * @see org.w3c.tidy.ParseProperty#getType()
387 */
388 public String getType()
389 {
390 return "Name";
391 }
392
393 /**
394 * @see org.w3c.tidy.ParseProperty#getOptionValues()
395 */
396 public String getOptionValues()
397 {
398 return "-";
399 }
400
401 /**
402 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
403 */
404 public String getFriendlyName(String option, Object value, Configuration configuration)
405 {
406 return value == null ? "" : value.toString();
407 }
408 }
409
410 /**
411 * parser for name values.
412 */
413 static class ParseTagNames implements ParseProperty
414 {
415
416 /**
417 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
418 */
419 public Object parse(String value, String option, Configuration configuration)
420 {
421 short tagType = Dict.TAGTYPE_INLINE;
422
423 if ("new-inline-tags".equals(option))
424 {
425 tagType = Dict.TAGTYPE_INLINE;
426 }
427 else if ("new-blocklevel-tags".equals(option))
428 {
429 tagType = Dict.TAGTYPE_BLOCK;
430 }
431 else if ("new-empty-tags".equals(option))
432 {
433 tagType = Dict.TAGTYPE_EMPTY;
434 }
435 else if ("new-pre-tags".equals(option))
436 {
437 tagType = Dict.TAGTYPE_PRE;
438 }
439
440 StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
441 while (t.hasMoreTokens())
442 {
443 configuration.definedTags |= tagType;
444 configuration.tt.defineTag(tagType, t.nextToken());
445 }
446 return null;
447 }
448
449 /**
450 * @see org.w3c.tidy.ParseProperty#getType()
451 */
452 public String getType()
453 {
454 return "Tag names";
455 }
456
457 /**
458 * @see org.w3c.tidy.ParseProperty#getOptionValues()
459 */
460 public String getOptionValues()
461 {
462 return "tagX, tagY, ...";
463 }
464
465 /**
466 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
467 */
468 public String getFriendlyName(String option, Object value, Configuration configuration)
469 {
470 short tagType;
471 if ("new-inline-tags".equals(option))
472 {
473 tagType = Dict.TAGTYPE_INLINE;
474 }
475 else if ("new-blocklevel-tags".equals(option))
476 {
477 tagType = Dict.TAGTYPE_BLOCK;
478 }
479 else if ("new-empty-tags".equals(option))
480 {
481 tagType = Dict.TAGTYPE_EMPTY;
482 }
483 else if ("new-pre-tags".equals(option))
484 {
485 tagType = Dict.TAGTYPE_PRE;
486 }
487 else
488 {
489 return "";
490 }
491
492 List tagList = configuration.tt.findAllDefinedTag(tagType);
493 if (tagList.isEmpty())
494 {
495 return "";
496 }
497
498 StringBuffer buffer = new StringBuffer();
499 Iterator iterator = tagList.iterator();
500 while (iterator.hasNext())
501 {
502 buffer.append(iterator.next());
503 buffer.append(" ");
504 }
505
506 return buffer.toString();
507 }
508 }
509
510 /**
511 * Parse doctype preference. doctype: <code>omit | auto | strict | loose | [fpi]</code> where the fpi is a string
512 * similar to <code>"-//ACME//DTD HTML 3.14159//EN"</code>.
513 */
514 static class ParseDocType implements ParseProperty
515 {
516
517 /**
518 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
519 */
520 public Object parse(String value, String option, Configuration configuration)
521 {
522 value = value.trim();
523
524
525
526 if (value.startsWith("\""))
527 {
528 configuration.docTypeMode = Configuration.DOCTYPE_USER;
529 return value;
530 }
531
532
533 String word = "";
534 StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
535 if (t.hasMoreTokens())
536 {
537 word = t.nextToken();
538 }
539
540 if ("auto".equalsIgnoreCase(word))
541 {
542 configuration.docTypeMode = Configuration.DOCTYPE_AUTO;
543 }
544 else if ("omit".equalsIgnoreCase(word))
545 {
546 configuration.docTypeMode = Configuration.DOCTYPE_OMIT;
547 }
548 else if ("strict".equalsIgnoreCase(word))
549 {
550 configuration.docTypeMode = Configuration.DOCTYPE_STRICT;
551 }
552 else if ("loose".equalsIgnoreCase(word) || "transitional".equalsIgnoreCase(word))
553 {
554 configuration.docTypeMode = Configuration.DOCTYPE_LOOSE;
555 }
556 else
557 {
558 configuration.report.badArgument(value, option);
559 }
560 return null;
561 }
562
563 /**
564 * @see org.w3c.tidy.ParseProperty#getType()
565 */
566 public String getType()
567 {
568 return "DocType";
569 }
570
571 /**
572 * @see org.w3c.tidy.ParseProperty#getOptionValues()
573 */
574 public String getOptionValues()
575 {
576 return "omit | auto | strict | loose | [fpi]";
577 }
578
579 /**
580 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
581 */
582 public String getFriendlyName(String option, Object value, Configuration configuration)
583 {
584
585 String stringValue;
586
587 switch (configuration.docTypeMode)
588 {
589 case Configuration.DOCTYPE_AUTO :
590 stringValue = "auto";
591 break;
592
593 case Configuration.DOCTYPE_OMIT :
594 stringValue = "omit";
595 break;
596
597 case Configuration.DOCTYPE_STRICT :
598 stringValue = "strict";
599 break;
600
601 case Configuration.DOCTYPE_LOOSE :
602 stringValue = "transitional";
603 break;
604
605 case Configuration.DOCTYPE_USER :
606 stringValue = configuration.docTypeStr;
607 break;
608
609 default :
610 stringValue = "unknown";
611 break;
612 }
613
614 return stringValue;
615 }
616 }
617
618 /**
619 * keep-first or keep-last?
620 */
621 static class ParseRepeatedAttribute implements ParseProperty
622 {
623
624 /**
625 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
626 */
627 public Object parse(String value, String option, Configuration configuration)
628 {
629 int dupAttr;
630
631 if ("keep-first".equalsIgnoreCase(value))
632 {
633 dupAttr = Configuration.KEEP_FIRST;
634 }
635 else if ("keep-last".equalsIgnoreCase(value))
636 {
637 dupAttr = Configuration.KEEP_LAST;
638 }
639 else
640 {
641 configuration.report.badArgument(value, option);
642 dupAttr = -1;
643 }
644 return new Integer(dupAttr);
645 }
646
647 /**
648 * @see org.w3c.tidy.ParseProperty#getType()
649 */
650 public String getType()
651 {
652 return "Enum";
653 }
654
655 /**
656 * @see org.w3c.tidy.ParseProperty#getOptionValues()
657 */
658 public String getOptionValues()
659 {
660 return "keep-first, keep-last";
661 }
662
663 /**
664 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
665 */
666 public String getFriendlyName(String option, Object value, Configuration configuration)
667 {
668 if (value == null)
669 {
670 return "";
671 }
672
673 int intValue = ((Integer) value).intValue();
674 String stringValue;
675
676 switch (intValue)
677 {
678 case Configuration.KEEP_FIRST :
679 stringValue = "keep-first";
680 break;
681
682 case Configuration.KEEP_LAST :
683 stringValue = "keep-last";
684 break;
685
686 default :
687 stringValue = "unknown";
688 break;
689 }
690
691 return stringValue;
692 }
693 }
694
695 /**
696 * Parser for String values.
697 */
698 static class ParseString implements ParseProperty
699 {
700
701 /**
702 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
703 */
704 public Object parse(String value, String option, Configuration configuration)
705 {
706 return value;
707 }
708
709 /**
710 * @see org.w3c.tidy.ParseProperty#getType()
711 */
712 public String getType()
713 {
714 return "String";
715 }
716
717 /**
718 * @see org.w3c.tidy.ParseProperty#getOptionValues()
719 */
720 public String getOptionValues()
721 {
722 return "-";
723 }
724
725 /**
726 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
727 */
728 public String getFriendlyName(String option, Object value, Configuration configuration)
729 {
730 return value == null ? "" : (String) value;
731 }
732 }
733
734 /**
735 * Parser for indent values.
736 */
737 static class ParseIndent implements ParseProperty
738 {
739
740 /**
741 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
742 */
743 public Object parse(String value, String option, Configuration configuration)
744 {
745 boolean b = configuration.indentContent;
746
747 if ("yes".equalsIgnoreCase(value))
748 {
749 b = true;
750 configuration.smartIndent = false;
751 }
752 else if ("true".equalsIgnoreCase(value))
753 {
754 b = true;
755 configuration.smartIndent = false;
756 }
757 else if ("no".equalsIgnoreCase(value))
758 {
759 b = false;
760 configuration.smartIndent = false;
761 }
762 else if ("false".equalsIgnoreCase(value))
763 {
764 b = false;
765 configuration.smartIndent = false;
766 }
767 else if ("auto".equalsIgnoreCase(value))
768 {
769 b = true;
770 configuration.smartIndent = true;
771 }
772 else
773 {
774 configuration.report.badArgument(value, option);
775 }
776 return b ? Boolean.TRUE : Boolean.FALSE;
777 }
778
779 /**
780 * @see org.w3c.tidy.ParseProperty#getType()
781 */
782 public String getType()
783 {
784 return "Indent";
785 }
786
787 /**
788 * @see org.w3c.tidy.ParseProperty#getOptionValues()
789 */
790 public String getOptionValues()
791 {
792 return "auto, y/n, yes/no, t/f, true/false, 1/0";
793 }
794
795 /**
796 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
797 */
798 public String getFriendlyName(String option, Object value, Configuration configuration)
799 {
800 return value == null ? "" : value.toString();
801 }
802 }
803
804 /**
805 * Parser for css selectors.
806 */
807 static class ParseCSS1Selector implements ParseProperty
808 {
809
810 /**
811 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
812 */
813 public Object parse(String value, String option, Configuration configuration)
814 {
815 StringTokenizer t = new StringTokenizer(value);
816 String buf = null;
817 if (t.countTokens() >= 1)
818 {
819 buf = t.nextToken() + "-";
820
821 }
822 else
823 {
824 configuration.report.badArgument(value, option);
825 }
826
827 if (!Lexer.isCSS1Selector(value))
828 {
829 configuration.report.badArgument(value, option);
830 }
831
832 return buf;
833 }
834
835 /**
836 * @see org.w3c.tidy.ParseProperty#getType()
837 */
838 public String getType()
839 {
840 return "Name";
841 }
842
843 /**
844 * @see org.w3c.tidy.ParseProperty#getOptionValues()
845 */
846 public String getOptionValues()
847 {
848 return "CSS1 selector";
849 }
850
851 /**
852 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
853 */
854 public String getFriendlyName(String option, Object value, Configuration configuration)
855 {
856 return value == null ? "" : (String) value;
857 }
858 }
859
860 /**
861 * Parser for newline bytes. Allows lf|crlf|cr.
862 */
863 static class ParseNewLine implements ParseProperty
864 {
865
866 /**
867 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
868 */
869 public Object parse(String value, String option, Configuration configuration)
870 {
871
872 if ("lf".equalsIgnoreCase(value))
873 {
874 configuration.newline = new char[]{'\n'};
875 }
876 else if ("cr".equalsIgnoreCase(value))
877 {
878 configuration.newline = new char[]{'\r'};
879 }
880 else if ("crlf".equalsIgnoreCase(value))
881 {
882 configuration.newline = new char[]{'\r', '\n'};
883 }
884 else
885 {
886 configuration.report.badArgument(value, option);
887 }
888 return null;
889 }
890
891 /**
892 * @see org.w3c.tidy.ParseProperty#getType()
893 */
894 public String getType()
895 {
896 return "Enum";
897 }
898
899 /**
900 * @see org.w3c.tidy.ParseProperty#getOptionValues()
901 */
902 public String getOptionValues()
903 {
904 return "lf, crlf, cr";
905 }
906
907 /**
908 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
909 */
910 public String getFriendlyName(String option, Object value, Configuration configuration)
911 {
912 if (configuration.newline.length == 1)
913 {
914 return (configuration.newline[0] == '\n') ? "lf" : "cr";
915 }
916 return "crlf";
917 }
918 }
919
920 }