1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54 package org.w3c.tidy;
55
56 import java.util.Iterator;
57 import java.util.List;
58 import java.util.StringTokenizer;
59
60
61 /***
62 * Property parser instances.
63 * @author Fabrizio Giustina
64 * @version $Revision $ ($Author $)
65 */
66 public final class ParsePropertyImpl
67 {
68
69 /***
70 * configuration parser for int values.
71 */
72 static final ParseProperty INT = new ParseInt();
73
74 /***
75 * configuration parser for boolean values.
76 */
77 static final ParseProperty BOOL = new ParseBoolean();
78
79 /***
80 * configuration parser for inverted boolean values.
81 */
82 static final ParseProperty INVBOOL = new ParseInvBoolean();
83
84 /***
85 * configuration parser for char encoding values.
86 */
87 static final ParseProperty CHAR_ENCODING = new ParseCharEncoding();
88
89 /***
90 * configuration parser for name values.
91 */
92 static final ParseProperty NAME = new ParseName();
93
94 /***
95 * configuration parser for tag names.
96 */
97 static final ParseProperty TAGNAMES = new ParseTagNames();
98
99 /***
100 * configuration parser for doctype property.
101 */
102 static final ParseProperty DOCTYPE = new ParseDocType();
103
104 /***
105 * configuration parser for repetated attribute property.
106 */
107 static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute();
108
109 /***
110 * configuration parser for String values.
111 */
112 static final ParseProperty STRING = new ParseString();
113
114 /***
115 * configuration parser for indent property.
116 */
117 static final ParseProperty INDENT = new ParseIndent();
118
119 /***
120 * configuration parser for css selectors.
121 */
122 static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector();
123
124 /***
125 * configuration parser for new line bytes.
126 */
127 static final ParseProperty NEWLINE = new ParseNewLine();
128
129 /***
130 * don't instantiate.
131 */
132 private ParsePropertyImpl()
133 {
134
135 }
136
137 /***
138 * parser for integer values.
139 */
140 static class ParseInt implements ParseProperty
141 {
142
143 /***
144 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
145 */
146 public Object parse(String value, String option, Configuration configuration)
147 {
148 int i = 0;
149 try
150 {
151 i = Integer.parseInt(value);
152 }
153 catch (NumberFormatException e)
154 {
155 configuration.report.badArgument(value, option);
156 i = -1;
157 }
158 return new Integer(i);
159 }
160
161 /***
162 * @see org.w3c.tidy.ParseProperty#getType()
163 */
164 public String getType()
165 {
166 return "Integer";
167 }
168
169 /***
170 * @see org.w3c.tidy.ParseProperty#getOptionValues()
171 */
172 public String getOptionValues()
173 {
174 return "0, 1, 2, ...";
175 }
176
177 /***
178 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
179 */
180 public String getFriendlyName(String option, Object value, Configuration configuration)
181 {
182 return value == null ? "" : value.toString();
183 }
184 }
185
186 /***
187 * parser for boolean values.
188 */
189 static class ParseBoolean implements ParseProperty
190 {
191
192 /***
193 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
194 */
195 public Object parse(String value, String option, Configuration configuration)
196 {
197 Boolean b = Boolean.TRUE;
198 if (value != null && value.length() > 0)
199 {
200 char c = value.charAt(0);
201 if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
202 {
203 b = Boolean.TRUE;
204 }
205 else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
206 {
207 b = Boolean.FALSE;
208 }
209 else
210 {
211 configuration.report.badArgument(value, option);
212 }
213 }
214 return b;
215 }
216
217 /***
218 * @see org.w3c.tidy.ParseProperty#getType()
219 */
220 public String getType()
221 {
222 return "Boolean";
223 }
224
225 /***
226 * @see org.w3c.tidy.ParseProperty#getOptionValues()
227 */
228 public String getOptionValues()
229 {
230 return "y/n, yes/no, t/f, true/false, 1/0";
231 }
232
233 /***
234 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
235 */
236 public String getFriendlyName(String option, Object value, Configuration configuration)
237 {
238 if (value == null)
239 {
240 return "";
241 }
242
243 return ((Boolean) value).booleanValue() ? "yes" : "no";
244 }
245 }
246
247 /***
248 * parser for boolean values.
249 */
250 static class ParseInvBoolean implements ParseProperty
251 {
252
253 /***
254 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
255 */
256 public Object parse(String value, String option, Configuration configuration)
257 {
258 return (((Boolean) BOOL.parse(value, option, configuration)).booleanValue() ? Boolean.FALSE : Boolean.TRUE);
259 }
260
261 /***
262 * @see org.w3c.tidy.ParseProperty#getType()
263 */
264 public String getType()
265 {
266 return "Boolean";
267 }
268
269 /***
270 * @see org.w3c.tidy.ParseProperty#getOptionValues()
271 */
272 public String getOptionValues()
273 {
274 return "yes, no, true, false";
275 }
276
277 /***
278 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
279 */
280 public String getFriendlyName(String option, Object value, Configuration configuration)
281 {
282 if (value == null)
283 {
284 return "";
285 }
286
287 return ((Boolean) value).booleanValue() ? "no" : "yes";
288 }
289 }
290
291 /***
292 * parse character encoding option. Can be RAW, ASCII, LATIN1, UTF8, ISO2022, MACROMAN, UTF16LE, UTF16BE, UTF16,
293 * WIN1252, BIG5, SHIFTJIS
294 */
295 static class ParseCharEncoding implements ParseProperty
296 {
297
298 /***
299 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
300 */
301 public Object parse(String value, String option, Configuration configuration)
302 {
303
304 if ("raw".equalsIgnoreCase(value))
305 {
306
307 configuration.rawOut = true;
308 }
309 else if (!TidyUtils.isCharEncodingSupported(value))
310 {
311 configuration.report.badArgument(value, option);
312 }
313 else if ("input-encoding".equalsIgnoreCase(option))
314 {
315 configuration.setInCharEncodingName(value);
316 }
317 else if ("output-encoding".equalsIgnoreCase(option))
318 {
319 configuration.setOutCharEncodingName(value);
320 }
321 else if ("char-encoding".equalsIgnoreCase(option))
322 {
323 configuration.setInCharEncodingName(value);
324 configuration.setOutCharEncodingName(value);
325 }
326
327 return null;
328 }
329
330 /***
331 * @see org.w3c.tidy.ParseProperty#getType()
332 */
333 public String getType()
334 {
335 return "Encoding";
336 }
337
338 /***
339 * @see org.w3c.tidy.ParseProperty#getOptionValues()
340 */
341 public String getOptionValues()
342 {
343
344 return "Any valid java char encoding name";
345 }
346
347 /***
348 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
349 */
350 public String getFriendlyName(String option, Object value, Configuration configuration)
351 {
352 if ("output-encoding".equalsIgnoreCase(option))
353 {
354 return configuration.getOutCharEncodingName();
355 }
356
357
358 return configuration.getInCharEncodingName();
359 }
360 }
361
362 /***
363 * parser for name values (a string excluding whitespace).
364 */
365 static class ParseName implements ParseProperty
366 {
367
368 /***
369 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
370 */
371 public Object parse(String value, String option, Configuration configuration)
372 {
373 StringTokenizer t = new StringTokenizer(value);
374 String rs = null;
375 if (t.countTokens() >= 1)
376 {
377 rs = t.nextToken();
378 }
379 else
380 {
381 configuration.report.badArgument(value, option);
382 }
383 return rs;
384 }
385
386 /***
387 * @see org.w3c.tidy.ParseProperty#getType()
388 */
389 public String getType()
390 {
391 return "Name";
392 }
393
394 /***
395 * @see org.w3c.tidy.ParseProperty#getOptionValues()
396 */
397 public String getOptionValues()
398 {
399 return "-";
400 }
401
402 /***
403 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
404 */
405 public String getFriendlyName(String option, Object value, Configuration configuration)
406 {
407 return value == null ? "" : value.toString();
408 }
409 }
410
411 /***
412 * parser for name values.
413 */
414 static class ParseTagNames implements ParseProperty
415 {
416
417 /***
418 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
419 */
420 public Object parse(String value, String option, Configuration configuration)
421 {
422 short tagType = Dict.TAGTYPE_INLINE;
423
424 if ("new-inline-tags".equals(option))
425 {
426 tagType = Dict.TAGTYPE_INLINE;
427 }
428 else if ("new-blocklevel-tags".equals(option))
429 {
430 tagType = Dict.TAGTYPE_BLOCK;
431 }
432 else if ("new-empty-tags".equals(option))
433 {
434 tagType = Dict.TAGTYPE_EMPTY;
435 }
436 else if ("new-pre-tags".equals(option))
437 {
438 tagType = Dict.TAGTYPE_PRE;
439 }
440
441 StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
442 while (t.hasMoreTokens())
443 {
444 configuration.definedTags |= tagType;
445 configuration.tt.defineTag(tagType, t.nextToken());
446 }
447 return null;
448 }
449
450 /***
451 * @see org.w3c.tidy.ParseProperty#getType()
452 */
453 public String getType()
454 {
455 return "Tag names";
456 }
457
458 /***
459 * @see org.w3c.tidy.ParseProperty#getOptionValues()
460 */
461 public String getOptionValues()
462 {
463 return "tagX, tagY, ...";
464 }
465
466 /***
467 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
468 */
469 public String getFriendlyName(String option, Object value, Configuration configuration)
470 {
471 short tagType;
472 if ("new-inline-tags".equals(option))
473 {
474 tagType = Dict.TAGTYPE_INLINE;
475 }
476 else if ("new-blocklevel-tags".equals(option))
477 {
478 tagType = Dict.TAGTYPE_BLOCK;
479 }
480 else if ("new-empty-tags".equals(option))
481 {
482 tagType = Dict.TAGTYPE_EMPTY;
483 }
484 else if ("new-pre-tags".equals(option))
485 {
486 tagType = Dict.TAGTYPE_PRE;
487 }
488 else
489 {
490 return "";
491 }
492
493 List tagList = configuration.tt.findAllDefinedTag(tagType);
494 if (tagList.isEmpty())
495 {
496 return "";
497 }
498
499 StringBuffer buffer = new StringBuffer();
500 Iterator iterator = tagList.iterator();
501 while (iterator.hasNext())
502 {
503 buffer.append(iterator.next());
504 buffer.append(" ");
505 }
506
507 return buffer.toString();
508 }
509 }
510
511 /***
512 * Parse doctype preference. doctype: <code>omit | auto | strict | loose | [fpi]</code> where the fpi is a string
513 * similar to <code>"-//ACME//DTD HTML 3.14159//EN"</code>.
514 */
515 static class ParseDocType implements ParseProperty
516 {
517
518 /***
519 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
520 */
521 public Object parse(String value, String option, Configuration configuration)
522 {
523 value = value.trim();
524
525
526
527 if (value.startsWith("\""))
528 {
529 configuration.docTypeMode = Configuration.DOCTYPE_USER;
530 return value;
531 }
532
533
534 String word = "";
535 StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
536 if (t.hasMoreTokens())
537 {
538 word = t.nextToken();
539 }
540
541 if ("auto".equalsIgnoreCase(word))
542 {
543 configuration.docTypeMode = Configuration.DOCTYPE_AUTO;
544 }
545 else if ("omit".equalsIgnoreCase(word))
546 {
547 configuration.docTypeMode = Configuration.DOCTYPE_OMIT;
548 }
549 else if ("strict".equalsIgnoreCase(word))
550 {
551 configuration.docTypeMode = Configuration.DOCTYPE_STRICT;
552 }
553 else if ("loose".equalsIgnoreCase(word) || "transitional".equalsIgnoreCase(word))
554 {
555 configuration.docTypeMode = Configuration.DOCTYPE_LOOSE;
556 }
557 else
558 {
559 configuration.report.badArgument(value, option);
560 }
561 return null;
562 }
563
564 /***
565 * @see org.w3c.tidy.ParseProperty#getType()
566 */
567 public String getType()
568 {
569 return "DocType";
570 }
571
572 /***
573 * @see org.w3c.tidy.ParseProperty#getOptionValues()
574 */
575 public String getOptionValues()
576 {
577 return "omit | auto | strict | loose | [fpi]";
578 }
579
580 /***
581 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
582 */
583 public String getFriendlyName(String option, Object value, Configuration configuration)
584 {
585
586 String stringValue;
587
588 switch (configuration.docTypeMode)
589 {
590 case Configuration.DOCTYPE_AUTO :
591 stringValue = "auto";
592 break;
593
594 case Configuration.DOCTYPE_OMIT :
595 stringValue = "omit";
596 break;
597
598 case Configuration.DOCTYPE_STRICT :
599 stringValue = "strict";
600 break;
601
602 case Configuration.DOCTYPE_LOOSE :
603 stringValue = "transitional";
604 break;
605
606 case Configuration.DOCTYPE_USER :
607 stringValue = configuration.docTypeStr;
608 break;
609
610 default :
611 stringValue = "unknown";
612 break;
613 }
614
615 return stringValue;
616 }
617 }
618
619 /***
620 * keep-first or keep-last?
621 */
622 static class ParseRepeatedAttribute implements ParseProperty
623 {
624
625 /***
626 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
627 */
628 public Object parse(String value, String option, Configuration configuration)
629 {
630 int dupAttr;
631
632 if ("keep-first".equalsIgnoreCase(value))
633 {
634 dupAttr = Configuration.KEEP_FIRST;
635 }
636 else if ("keep-last".equalsIgnoreCase(value))
637 {
638 dupAttr = Configuration.KEEP_LAST;
639 }
640 else
641 {
642 configuration.report.badArgument(value, option);
643 dupAttr = -1;
644 }
645 return new Integer(dupAttr);
646 }
647
648 /***
649 * @see org.w3c.tidy.ParseProperty#getType()
650 */
651 public String getType()
652 {
653 return "Enum";
654 }
655
656 /***
657 * @see org.w3c.tidy.ParseProperty#getOptionValues()
658 */
659 public String getOptionValues()
660 {
661 return "keep-first, keep-last";
662 }
663
664 /***
665 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
666 */
667 public String getFriendlyName(String option, Object value, Configuration configuration)
668 {
669 if (value == null)
670 {
671 return "";
672 }
673
674 int intValue = ((Integer) value).intValue();
675 String stringValue;
676
677 switch (intValue)
678 {
679 case Configuration.KEEP_FIRST :
680 stringValue = "keep-first";
681 break;
682
683 case Configuration.KEEP_LAST :
684 stringValue = "keep-last";
685 break;
686
687 default :
688 stringValue = "unknown";
689 break;
690 }
691
692 return stringValue;
693 }
694 }
695
696 /***
697 * Parser for String values.
698 */
699 static class ParseString implements ParseProperty
700 {
701
702 /***
703 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
704 */
705 public Object parse(String value, String option, Configuration configuration)
706 {
707 return value;
708 }
709
710 /***
711 * @see org.w3c.tidy.ParseProperty#getType()
712 */
713 public String getType()
714 {
715 return "String";
716 }
717
718 /***
719 * @see org.w3c.tidy.ParseProperty#getOptionValues()
720 */
721 public String getOptionValues()
722 {
723 return "-";
724 }
725
726 /***
727 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
728 */
729 public String getFriendlyName(String option, Object value, Configuration configuration)
730 {
731 return value == null ? "" : (String) value;
732 }
733 }
734
735 /***
736 * Parser for indent values.
737 */
738 static class ParseIndent implements ParseProperty
739 {
740
741 /***
742 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
743 */
744 public Object parse(String value, String option, Configuration configuration)
745 {
746 boolean b = configuration.indentContent;
747
748 if ("yes".equalsIgnoreCase(value))
749 {
750 b = true;
751 configuration.smartIndent = false;
752 }
753 else if ("true".equalsIgnoreCase(value))
754 {
755 b = true;
756 configuration.smartIndent = false;
757 }
758 else if ("no".equalsIgnoreCase(value))
759 {
760 b = false;
761 configuration.smartIndent = false;
762 }
763 else if ("false".equalsIgnoreCase(value))
764 {
765 b = false;
766 configuration.smartIndent = false;
767 }
768 else if ("auto".equalsIgnoreCase(value))
769 {
770 b = true;
771 configuration.smartIndent = true;
772 }
773 else
774 {
775 configuration.report.badArgument(value, option);
776 }
777 return b ? Boolean.TRUE : Boolean.FALSE;
778 }
779
780 /***
781 * @see org.w3c.tidy.ParseProperty#getType()
782 */
783 public String getType()
784 {
785 return "Indent";
786 }
787
788 /***
789 * @see org.w3c.tidy.ParseProperty#getOptionValues()
790 */
791 public String getOptionValues()
792 {
793 return "auto, y/n, yes/no, t/f, true/false, 1/0";
794 }
795
796 /***
797 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
798 */
799 public String getFriendlyName(String option, Object value, Configuration configuration)
800 {
801 return value == null ? "" : value.toString();
802 }
803 }
804
805 /***
806 * Parser for css selectors.
807 */
808 static class ParseCSS1Selector implements ParseProperty
809 {
810
811 /***
812 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
813 */
814 public Object parse(String value, String option, Configuration configuration)
815 {
816 StringTokenizer t = new StringTokenizer(value);
817 String buf = null;
818 if (t.countTokens() >= 1)
819 {
820 buf = t.nextToken() + "-";
821
822 }
823 else
824 {
825 configuration.report.badArgument(value, option);
826 }
827
828 if (!Lexer.isCSS1Selector(value))
829 {
830 configuration.report.badArgument(value, option);
831 }
832
833 return buf;
834 }
835
836 /***
837 * @see org.w3c.tidy.ParseProperty#getType()
838 */
839 public String getType()
840 {
841 return "Name";
842 }
843
844 /***
845 * @see org.w3c.tidy.ParseProperty#getOptionValues()
846 */
847 public String getOptionValues()
848 {
849 return "CSS1 selector";
850 }
851
852 /***
853 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
854 */
855 public String getFriendlyName(String option, Object value, Configuration configuration)
856 {
857 return value == null ? "" : (String) value;
858 }
859 }
860
861 /***
862 * Parser for newline bytes. Allows lf|crlf|cr.
863 */
864 static class ParseNewLine implements ParseProperty
865 {
866
867 /***
868 * @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
869 */
870 public Object parse(String value, String option, Configuration configuration)
871 {
872
873 if ("lf".equalsIgnoreCase(value))
874 {
875 configuration.newline = new char[]{'\n'};
876 }
877 else if ("cr".equalsIgnoreCase(value))
878 {
879 configuration.newline = new char[]{'\r'};
880 }
881 else if ("crlf".equalsIgnoreCase(value))
882 {
883 configuration.newline = new char[]{'\r', '\n'};
884 }
885 else
886 {
887 configuration.report.badArgument(value, option);
888 }
889 return null;
890 }
891
892 /***
893 * @see org.w3c.tidy.ParseProperty#getType()
894 */
895 public String getType()
896 {
897 return "Enum";
898 }
899
900 /***
901 * @see org.w3c.tidy.ParseProperty#getOptionValues()
902 */
903 public String getOptionValues()
904 {
905 return "lf, crlf, cr";
906 }
907
908 /***
909 * @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
910 */
911 public String getFriendlyName(String option, Object value, Configuration configuration)
912 {
913 if (configuration.newline.length == 1)
914 {
915 return (configuration.newline[0] == '\n') ? "lf" : "cr";
916 }
917 return "crlf";
918 }
919 }
920
921 }