View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.FileInputStream;
57  import java.io.IOException;
58  import java.io.Serializable;
59  import java.io.Writer;
60  import java.lang.reflect.Field;
61  import java.util.ArrayList;
62  import java.util.Collections;
63  import java.util.Enumeration;
64  import java.util.HashMap;
65  import java.util.Iterator;
66  import java.util.List;
67  import java.util.Map;
68  import java.util.Properties;
69  
70  
71  /***
72   * Read configuration file and manage configuration properties. Configuration files associate a property name with a
73   * value. The format is that of a Java .properties file.
74   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
75   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
76   * @author Fabrizio Giustina
77   * @version $Revision: 1.43 $ ($Author: fgiust $)
78   */
79  public class Configuration implements Serializable
80  {
81  
82      /***
83       * character encoding = RAW.
84       * @deprecated use <code>Tidy.setRawOut(true)</code> for raw output
85       */
86      public static final int RAW = 0;
87  
88      /***
89       * character encoding = ASCII.
90       * @deprecated
91       */
92      public static final int ASCII = 1;
93  
94      /***
95       * character encoding = LATIN1.
96       * @deprecated
97       */
98      public static final int LATIN1 = 2;
99  
100     /***
101      * character encoding = UTF8.
102      * @deprecated
103      */
104     public static final int UTF8 = 3;
105 
106     /***
107      * character encoding = ISO2022.
108      * @deprecated
109      */
110     public static final int ISO2022 = 4;
111 
112     /***
113      * character encoding = MACROMAN.
114      * @deprecated
115      */
116     public static final int MACROMAN = 5;
117 
118     /***
119      * character encoding = UTF16LE.
120      * @deprecated
121      */
122     public static final int UTF16LE = 6;
123 
124     /***
125      * character encoding = UTF16BE.
126      * @deprecated
127      */
128     public static final int UTF16BE = 7;
129 
130     /***
131      * character encoding = UTF16.
132      * @deprecated
133      */
134     public static final int UTF16 = 8;
135 
136     /***
137      * character encoding = WIN1252.
138      * @deprecated
139      */
140     public static final int WIN1252 = 9;
141 
142     /***
143      * character encoding = BIG5.
144      * @deprecated
145      */
146     public static final int BIG5 = 10;
147 
148     /***
149      * character encoding = SHIFTJIS.
150      * @deprecated
151      */
152     public static final int SHIFTJIS = 11;
153 
154     /***
155      * Convert from deprecated tidy encoding constant to standard java encoding name.
156      */
157     private final String[] ENCODING_NAMES = new String[]{
158         "raw", // rawOut, it will not be mapped to a java encoding
159         "ASCII",
160         "ISO8859_1",
161         "UTF8",
162         "JIS",
163         "MacRoman",
164         "UnicodeLittle",
165         "UnicodeBig",
166         "Unicode",
167         "Cp1252",
168         "Big5",
169         "SJIS"};
170 
171     /***
172      * treatment of doctype: omit.
173      * @todo should be an enumeration DocTypeMode
174      */
175     public static final int DOCTYPE_OMIT = 0;
176 
177     /***
178      * treatment of doctype: auto.
179      */
180     public static final int DOCTYPE_AUTO = 1;
181 
182     /***
183      * treatment of doctype: strict.
184      */
185     public static final int DOCTYPE_STRICT = 2;
186 
187     /***
188      * treatment of doctype: loose.
189      */
190     public static final int DOCTYPE_LOOSE = 3;
191 
192     /***
193      * treatment of doctype: user.
194      */
195     public static final int DOCTYPE_USER = 4;
196 
197     /***
198      * Keep last duplicate attribute.
199      * @todo should be an enumeration DupAttrMode
200      */
201     public static final int KEEP_LAST = 0;
202 
203     /***
204      * Keep first duplicate attribute.
205      */
206     public static final int KEEP_FIRST = 1;
207 
208     /***
209      * Map containg all the valid configuration options and the related parser. Tag entry contains String(option
210      * name)-Flag instance.
211      */
212     private static final Map OPTIONS = new HashMap();
213 
214     /***
215      * serial version UID for this class.
216      */
217     private static final long serialVersionUID = -4955155037138560842L;
218 
219     static
220     {
221         addConfigOption(new Flag("indent-spaces", "spaces", ParsePropertyImpl.INT));
222         addConfigOption(new Flag("wrap", "wraplen", ParsePropertyImpl.INT));
223         addConfigOption(new Flag("show-errors", "showErrors", ParsePropertyImpl.INT));
224         addConfigOption(new Flag("tab-size", "tabsize", ParsePropertyImpl.INT));
225 
226         addConfigOption(new Flag("wrap-attributes", "wrapAttVals", ParsePropertyImpl.BOOL));
227         addConfigOption(new Flag("wrap-script-literals", "wrapScriptlets", ParsePropertyImpl.BOOL));
228         addConfigOption(new Flag("wrap-sections", "wrapSection", ParsePropertyImpl.BOOL));
229         addConfigOption(new Flag("wrap-asp", "wrapAsp", ParsePropertyImpl.BOOL));
230         addConfigOption(new Flag("wrap-jste", "wrapJste", ParsePropertyImpl.BOOL));
231         addConfigOption(new Flag("wrap-php", "wrapPhp", ParsePropertyImpl.BOOL));
232         addConfigOption(new Flag("literal-attributes", "literalAttribs", ParsePropertyImpl.BOOL));
233         addConfigOption(new Flag("show-body-only", "bodyOnly", ParsePropertyImpl.BOOL));
234         addConfigOption(new Flag("fix-uri", "fixUri", ParsePropertyImpl.BOOL));
235         addConfigOption(new Flag("lower-literals", "lowerLiterals", ParsePropertyImpl.BOOL));
236         addConfigOption(new Flag("hide-comments", "hideComments", ParsePropertyImpl.BOOL));
237         addConfigOption(new Flag("indent-cdata", "indentCdata", ParsePropertyImpl.BOOL));
238         addConfigOption(new Flag("force-output", "forceOutput", ParsePropertyImpl.BOOL));
239         addConfigOption(new Flag("ascii-chars", "asciiChars", ParsePropertyImpl.BOOL));
240         addConfigOption(new Flag("join-classes", "joinClasses", ParsePropertyImpl.BOOL));
241         addConfigOption(new Flag("join-styles", "joinStyles", ParsePropertyImpl.BOOL));
242         addConfigOption(new Flag("escape-cdata", "escapeCdata", ParsePropertyImpl.BOOL));
243         addConfigOption(new Flag("replace-color", "replaceColor", ParsePropertyImpl.BOOL));
244         addConfigOption(new Flag("quiet", "quiet", ParsePropertyImpl.BOOL));
245         addConfigOption(new Flag("tidy-mark", "tidyMark", ParsePropertyImpl.BOOL));
246         addConfigOption(new Flag("indent-attributes", "indentAttributes", ParsePropertyImpl.BOOL));
247         addConfigOption(new Flag("hide-endtags", "hideEndTags", ParsePropertyImpl.BOOL));
248         addConfigOption(new Flag("input-xml", "xmlTags", ParsePropertyImpl.BOOL));
249         addConfigOption(new Flag("output-xml", "xmlOut", ParsePropertyImpl.BOOL));
250         addConfigOption(new Flag("output-html", "htmlOut", ParsePropertyImpl.BOOL));
251         addConfigOption(new Flag("output-xhtml", "xHTML", ParsePropertyImpl.BOOL));
252         addConfigOption(new Flag("add-xml-pi", "xmlPi", ParsePropertyImpl.BOOL));
253         addConfigOption(new Flag("add-xml-decl", "xmlPi", ParsePropertyImpl.BOOL));
254         addConfigOption(new Flag("assume-xml-procins", "xmlPIs", ParsePropertyImpl.BOOL));
255         addConfigOption(new Flag("uppercase-tags", "upperCaseTags", ParsePropertyImpl.BOOL));
256         addConfigOption(new Flag("uppercase-attributes", "upperCaseAttrs", ParsePropertyImpl.BOOL));
257         addConfigOption(new Flag("bare", "makeBare", ParsePropertyImpl.BOOL));
258         addConfigOption(new Flag("clean", "makeClean", ParsePropertyImpl.BOOL));
259         addConfigOption(new Flag("logical-emphasis", "logicalEmphasis", ParsePropertyImpl.BOOL));
260         addConfigOption(new Flag("word-2000", "word2000", ParsePropertyImpl.BOOL));
261         addConfigOption(new Flag("drop-empty-paras", "dropEmptyParas", ParsePropertyImpl.BOOL));
262         addConfigOption(new Flag("drop-font-tags", "dropFontTags", ParsePropertyImpl.BOOL));
263         addConfigOption(new Flag("drop-proprietary-attributes", "dropProprietaryAttributes", ParsePropertyImpl.BOOL));
264         addConfigOption(new Flag("enclose-text", "encloseBodyText", ParsePropertyImpl.BOOL));
265         addConfigOption(new Flag("enclose-block-text", "encloseBlockText", ParsePropertyImpl.BOOL));
266         addConfigOption(new Flag("add-xml-space", "xmlSpace", ParsePropertyImpl.BOOL));
267         addConfigOption(new Flag("fix-bad-comments", "fixComments", ParsePropertyImpl.BOOL));
268         addConfigOption(new Flag("split", "burstSlides", ParsePropertyImpl.BOOL));
269         addConfigOption(new Flag("break-before-br", "breakBeforeBR", ParsePropertyImpl.BOOL));
270         addConfigOption(new Flag("numeric-entities", "numEntities", ParsePropertyImpl.BOOL));
271         addConfigOption(new Flag("quote-marks", "quoteMarks", ParsePropertyImpl.BOOL));
272         addConfigOption(new Flag("quote-nbsp", "quoteNbsp", ParsePropertyImpl.BOOL));
273         addConfigOption(new Flag("quote-ampersand", "quoteAmpersand", ParsePropertyImpl.BOOL));
274         addConfigOption(new Flag("write-back", "writeback", ParsePropertyImpl.BOOL));
275         addConfigOption(new Flag("keep-time", "keepFileTimes", ParsePropertyImpl.BOOL));
276         addConfigOption(new Flag("show-warnings", "showWarnings", ParsePropertyImpl.BOOL));
277         addConfigOption(new Flag("ncr", "ncr", ParsePropertyImpl.BOOL));
278         addConfigOption(new Flag("fix-backslash", "fixBackslash", ParsePropertyImpl.BOOL));
279         addConfigOption(new Flag("gnu-emacs", "emacs", ParsePropertyImpl.BOOL));
280         addConfigOption(new Flag("only-errors", "onlyErrors", ParsePropertyImpl.BOOL));
281         addConfigOption(new Flag("output-raw", "rawOut", ParsePropertyImpl.BOOL));
282         addConfigOption(new Flag("trim-empty-elements", "trimEmpty", ParsePropertyImpl.BOOL));
283 
284         addConfigOption(new Flag("markup", "onlyErrors", ParsePropertyImpl.INVBOOL));
285 
286         addConfigOption(new Flag("char-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
287         addConfigOption(new Flag("input-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
288         addConfigOption(new Flag("output-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
289 
290         addConfigOption(new Flag("error-file", "errfile", ParsePropertyImpl.NAME));
291         addConfigOption(new Flag("slide-style", "slidestyle", ParsePropertyImpl.NAME));
292         addConfigOption(new Flag("language", "language", ParsePropertyImpl.NAME));
293 
294         addConfigOption(new Flag("new-inline-tags", null, ParsePropertyImpl.TAGNAMES));
295         addConfigOption(new Flag("new-blocklevel-tags", null, ParsePropertyImpl.TAGNAMES));
296         addConfigOption(new Flag("new-empty-tags", null, ParsePropertyImpl.TAGNAMES));
297         addConfigOption(new Flag("new-pre-tags", null, ParsePropertyImpl.TAGNAMES));
298 
299         addConfigOption(new Flag("doctype", "docTypeStr", ParsePropertyImpl.DOCTYPE));
300 
301         addConfigOption(new Flag("repeated-attributes", "duplicateAttrs", ParsePropertyImpl.REPEATED_ATTRIBUTES));
302 
303         addConfigOption(new Flag("alt-text", "altText", ParsePropertyImpl.STRING));
304 
305         addConfigOption(new Flag("indent", "indentContent", ParsePropertyImpl.INDENT));
306 
307         addConfigOption(new Flag("css-prefix", "cssPrefix", ParsePropertyImpl.CSS1SELECTOR));
308 
309         addConfigOption(new Flag("newline", null, ParsePropertyImpl.NEWLINE));
310     }
311 
312     /***
313      * default indentation.
314      */
315     protected int spaces = 2;
316 
317     /***
318      * default wrap margin (68).
319      */
320     protected int wraplen = 68;
321 
322     /***
323      * default tab size (8).
324      */
325     protected int tabsize = 8;
326 
327     /***
328      * see doctype property.
329      */
330     protected int docTypeMode = DOCTYPE_AUTO;
331 
332     /***
333      * Keep first or last duplicate attribute.
334      */
335     protected int duplicateAttrs = KEEP_LAST;
336 
337     /***
338      * default text for alt attribute.
339      */
340     protected String altText;
341 
342     /***
343      * style sheet for slides.
344      * @deprecated does nothing
345      */
346     protected String slidestyle;
347 
348     /***
349      * RJ language property.
350      */
351     protected String language; // #431953
352 
353     /***
354      * user specified doctype.
355      */
356     protected String docTypeStr;
357 
358     /***
359      * file name to write errors to.
360      */
361     protected String errfile;
362 
363     /***
364      * if true then output tidied markup.
365      */
366     protected boolean writeback;
367 
368     /***
369      * if true normal output is suppressed.
370      */
371     protected boolean onlyErrors;
372 
373     /***
374      * however errors are always shown.
375      */
376     protected boolean showWarnings = true;
377 
378     /***
379      * no 'Parsing X', guessed DTD or summary.
380      */
381     protected boolean quiet;
382 
383     /***
384      * indent content of appropriate tags.
385      */
386     protected boolean indentContent;
387 
388     /***
389      * does text/block level content effect indentation.
390      */
391     protected boolean smartIndent;
392 
393     /***
394      * suppress optional end tags.
395      */
396     protected boolean hideEndTags;
397 
398     /***
399      * treat input as XML.
400      */
401     protected boolean xmlTags;
402 
403     /***
404      * create output as XML.
405      */
406     protected boolean xmlOut;
407 
408     /***
409      * output extensible HTML.
410      */
411     protected boolean xHTML;
412 
413     /***
414      * output plain-old HTML, even for XHTML input. Yes means set explicitly.
415      */
416     protected boolean htmlOut;
417 
418     /***
419      * add <code>&lt;?xml?&gt;</code> for XML docs.
420      */
421     protected boolean xmlPi;
422 
423     /***
424      * output tags in upper not lower case.
425      */
426     protected boolean upperCaseTags;
427 
428     /***
429      * output attributes in upper not lower case.
430      */
431     protected boolean upperCaseAttrs;
432 
433     /***
434      * remove presentational clutter.
435      */
436     protected boolean makeClean;
437 
438     /***
439      * Make bare HTML: remove Microsoft cruft.
440      */
441     protected boolean makeBare;
442 
443     /***
444      * replace i by em and b by strong.
445      */
446     protected boolean logicalEmphasis;
447 
448     /***
449      * discard presentation tags.
450      */
451     protected boolean dropFontTags;
452 
453     /***
454      * discard proprietary attributes.
455      */
456     protected boolean dropProprietaryAttributes;
457 
458     /***
459      * discard empty p elements.
460      */
461     protected boolean dropEmptyParas = true;
462 
463     /***
464      * fix comments with adjacent hyphens.
465      */
466     protected boolean fixComments = true;
467 
468     /***
469      * trim empty elements.
470      */
471     protected boolean trimEmpty = true;
472 
473     /***
474      * o/p newline before br or not?
475      */
476     protected boolean breakBeforeBR;
477 
478     /***
479      * create slides on each h2 element.
480      */
481     protected boolean burstSlides;
482 
483     /***
484      * use numeric entities.
485      */
486     protected boolean numEntities;
487 
488     /***
489      * output " marks as &quot;.
490      */
491     protected boolean quoteMarks;
492 
493     /***
494      * output non-breaking space as entity.
495      */
496     protected boolean quoteNbsp = true;
497 
498     /***
499      * output naked ampersand as &amp;.
500      */
501     protected boolean quoteAmpersand = true;
502 
503     /***
504      * wrap within attribute values.
505      */
506     protected boolean wrapAttVals;
507 
508     /***
509      * wrap within JavaScript string literals.
510      */
511     protected boolean wrapScriptlets;
512 
513     /***
514      * wrap within CDATA section tags.
515      */
516     protected boolean wrapSection = true;
517 
518     /***
519      * wrap within ASP pseudo elements.
520      */
521     protected boolean wrapAsp = true;
522 
523     /***
524      * wrap within JSTE pseudo elements.
525      */
526     protected boolean wrapJste = true;
527 
528     /***
529      * wrap within PHP pseudo elements.
530      */
531     protected boolean wrapPhp = true;
532 
533     /***
534      * fix URLs by replacing \ with /.
535      */
536     protected boolean fixBackslash = true;
537 
538     /***
539      * newline+indent before each attribute.
540      */
541     protected boolean indentAttributes;
542 
543     /***
544      * If set to yes PIs must end with <code>?&gt;</code>.
545      */
546     protected boolean xmlPIs;
547 
548     /***
549      * if set to yes adds xml:space attr as needed.
550      */
551     protected boolean xmlSpace;
552 
553     /***
554      * if yes text at body is wrapped in p's.
555      */
556     protected boolean encloseBodyText;
557 
558     /***
559      * if yes text in blocks is wrapped in p's.
560      */
561     protected boolean encloseBlockText;
562 
563     /***
564      * if yes last modied time is preserved.
565      */
566     protected boolean keepFileTimes = true;
567 
568     /***
569      * draconian cleaning for Word2000.
570      */
571     protected boolean word2000;
572 
573     /***
574      * add meta element indicating tidied doc.
575      */
576     protected boolean tidyMark = true;
577 
578     /***
579      * if true format error output for GNU Emacs.
580      */
581     protected boolean emacs;
582 
583     /***
584      * if true attributes may use newlines.
585      */
586     protected boolean literalAttribs;
587 
588     /***
589      * output BODY content only.
590      */
591     protected boolean bodyOnly;
592 
593     /***
594      * properly escape URLs.
595      */
596     protected boolean fixUri = true;
597 
598     /***
599      * folds known attribute values to lower case.
600      */
601     protected boolean lowerLiterals = true;
602 
603     /***
604      * replace hex color attribute values with names.
605      */
606     protected boolean replaceColor;
607 
608     /***
609      * hides all (real) comments in output.
610      */
611     protected boolean hideComments;
612 
613     /***
614      * indent CDATA sections.
615      */
616     protected boolean indentCdata;
617 
618     /***
619      * output document even if errors were found.
620      */
621     protected boolean forceOutput;
622 
623     /***
624      * number of errors to put out.
625      */
626     protected int showErrors = 6;
627 
628     /***
629      * convert quotes and dashes to nearest ASCII char.
630      */
631     protected boolean asciiChars = true;
632 
633     /***
634      * join multiple class attributes.
635      */
636     protected boolean joinClasses;
637 
638     /***
639      * join multiple style attributes.
640      */
641     protected boolean joinStyles = true;
642 
643     /***
644      * replace CDATA sections with escaped text.
645      */
646     protected boolean escapeCdata = true;
647 
648     /***
649      * allow numeric character references.
650      */
651     protected boolean ncr = true; // #431953
652 
653     /***
654      * CSS class naming for -clean option.
655      */
656     protected String cssPrefix;
657 
658     /***
659      * char encoding used when replacing illegal SGML chars, regardless of specified encoding.
660      */
661     protected int replacementCharEncoding = WIN1252; // by default
662 
663     /***
664      * TagTable associated with this Configuration.
665      */
666     protected TagTable tt;
667 
668     /***
669      * Report instance. Used for messages.
670      */
671     protected Report report;
672 
673     /***
674      * track what types of tags user has defined to eliminate unnecessary searches.
675      */
676     protected int definedTags;
677 
678     /***
679      * bytes for the newline marker.
680      */
681     protected char[] newline = (System.getProperty("line.separator")).toCharArray();
682 
683     /***
684      * Input character encoding (defaults to LATIN1).
685      */
686     private int inCharEncoding = LATIN1;
687 
688     /***
689      * Input character encoding (defaults to "ISO8859_1").
690      */
691     private String inCharEncodingName = "ISO8859_1";
692 
693     /***
694      * Output character encoding (defaults to ASCII).
695      */
696     private int outCharEncoding = ASCII;
697 
698     /***
699      * Output character encoding (defaults to "ASCII").
700      */
701     private String outCharEncodingName = "ASCII";
702 
703     /***
704      * Avoid mapping values > 127 to entities.
705      */
706     protected boolean rawOut;
707 
708     /***
709      * configuration properties.
710      */
711     private transient Properties properties = new Properties();
712 
713     /***
714      * Instantiates a new Configuration. This method should be called by Tidy only.
715      * @param report Report instance
716      */
717     protected Configuration(Report report)
718     {
719         this.report = report;
720     }
721 
722     /***
723      * adds a config option to the map.
724      * @param flag configuration options added
725      */
726     private static void addConfigOption(Flag flag)
727     {
728         OPTIONS.put(flag.getName(), flag);
729     }
730 
731     /***
732      * adds configuration Properties.
733      * @param p Properties
734      */
735     public void addProps(Properties p)
736     {
737         Enumeration propEnum = p.propertyNames();
738         while (propEnum.hasMoreElements())
739         {
740             String key = (String) propEnum.nextElement();
741             String value = p.getProperty(key);
742             properties.put(key, value);
743         }
744         parseProps();
745     }
746 
747     /***
748      * Parses a property file.
749      * @param filename file name
750      */
751     public void parseFile(String filename)
752     {
753         try
754         {
755             properties.load(new FileInputStream(filename));
756         }
757         catch (IOException e)
758         {
759             System.err.println(filename + " " + e.toString());
760             return;
761         }
762         parseProps();
763     }
764 
765     /***
766      * Is the given String a valid configuration flag?
767      * @param name configuration parameter name
768      * @return <code>true</code> if the given String is a valid config option
769      */
770     public static boolean isKnownOption(String name)
771     {
772         return name != null && OPTIONS.containsKey(name);
773     }
774 
775     /***
776      * Parses the configuration properties file.
777      */
778     private void parseProps()
779     {
780         Iterator iterator = properties.keySet().iterator();
781 
782         while (iterator.hasNext())
783         {
784             String key = (String) iterator.next();
785             Flag flag = (Flag) OPTIONS.get(key);
786             if (flag == null)
787             {
788                 report.unknownOption(key);
789                 continue;
790             }
791 
792             String stringValue = properties.getProperty(key);
793             Object value = flag.getParser().parse(stringValue, key, this);
794             if (flag.getLocation() != null)
795             {
796                 try
797                 {
798                     flag.getLocation().set(this, value);
799                 }
800                 catch (IllegalArgumentException e)
801                 {
802                     throw new RuntimeException("IllegalArgumentException during config initialization for field "
803                         + key
804                         + "with value ["
805                         + value
806                         + "]: "
807                         + e.getMessage());
808                 }
809                 catch (IllegalAccessException e)
810                 {
811                     throw new RuntimeException("IllegalArgumentException during config initialization for field "
812                         + key
813                         + "with value ["
814                         + value
815                         + "]: "
816                         + e.getMessage());
817                 }
818             }
819         }
820     }
821 
822     /***
823      * Ensure that config is self consistent.
824      */
825     public void adjust()
826     {
827         if (encloseBlockText)
828         {
829             encloseBodyText = true;
830         }
831 
832         // avoid the need to set IndentContent when SmartIndent is set
833         if (smartIndent)
834         {
835             indentContent = true;
836         }
837 
838         // disable wrapping
839         if (wraplen == 0)
840         {
841             wraplen = 0x7FFFFFFF;
842         }
843 
844         // Word 2000 needs o:p to be declared as inline
845         if (word2000)
846         {
847             definedTags |= Dict.TAGTYPE_INLINE;
848             tt.defineTag(Dict.TAGTYPE_INLINE, "o:p");
849         }
850 
851         // #480701 disable XHTML output flag if both output-xhtml and xml are set
852         if (xmlTags)
853         {
854             xHTML = false;
855         }
856 
857         // XHTML is written in lower case
858         if (xHTML)
859         {
860             xmlOut = true;
861             upperCaseTags = false;
862             upperCaseAttrs = false;
863         }
864 
865         // if XML in, then XML out
866         if (xmlTags)
867         {
868             xmlOut = true;
869             xmlPIs = true;
870         }
871 
872         // #427837 - fix by Dave Raggett 02 Jun 01
873         // generate <?xml version="1.0" encoding="iso-8859-1"?> if the output character encoding is Latin-1 etc.
874         if (getOutCharEncoding() != UTF8 && getOutCharEncoding() != ASCII && xmlOut)
875         {
876             xmlPi = true;
877         }
878 
879         // XML requires end tags
880         if (xmlOut)
881         {
882             quoteAmpersand = true;
883             hideEndTags = false;
884         }
885     }
886 
887     /***
888      * prints available configuration options.
889      * @param errout where to write
890      * @param showActualConfiguration print actual configuration values
891      */
892     void printConfigOptions(Writer errout, boolean showActualConfiguration)
893     {
894         String pad = "                                                                               ";
895         try
896         {
897             errout.write("\nConfiguration File Settings:\n\n");
898 
899             if (showActualConfiguration)
900             {
901                 errout.write("Name                        Type       Current Value\n");
902             }
903             else
904             {
905                 errout.write("Name                        Type       Allowable values\n");
906             }
907 
908             errout.write("=========================== =========  ========================================\n");
909 
910             Flag configItem;
911 
912             // sort configuration options
913             List values = new ArrayList(OPTIONS.values());
914             Collections.sort(values);
915 
916             Iterator iterator = values.iterator();
917 
918             while (iterator.hasNext())
919             {
920                 configItem = (Flag) iterator.next();
921 
922                 errout.write(configItem.getName());
923                 errout.write(pad, 0, 28 - configItem.getName().length());
924 
925                 errout.write(configItem.getParser().getType());
926                 errout.write(pad, 0, 11 - configItem.getParser().getType().length());
927 
928                 if (showActualConfiguration)
929                 {
930                     Field field = configItem.getLocation();
931                     Object actualValue = null;
932 
933                     if (field != null)
934                     {
935                         try
936                         {
937                             actualValue = field.get(this);
938                         }
939                         catch (IllegalArgumentException e1)
940                         {
941                             // should never happen
942                             throw new RuntimeException("IllegalArgument when reading field " + field.getName());
943                         }
944                         catch (IllegalAccessException e1)
945                         {
946                             // should never happen
947                             throw new RuntimeException("IllegalAccess when reading field " + field.getName());
948                         }
949                     }
950 
951                     errout.write(configItem.getParser().getFriendlyName(configItem.getName(), actualValue, this));
952                 }
953                 else
954                 {
955                     errout.write(configItem.getParser().getOptionValues());
956                 }
957 
958                 errout.write("\n");
959 
960             }
961             errout.flush();
962         }
963         catch (IOException e)
964         {
965             throw new RuntimeException(e.getMessage());
966         }
967 
968     }
969 
970     /***
971      * A configuration option.
972      */
973     static class Flag implements Comparable
974     {
975 
976         /***
977          * option name.
978          */
979         private String name;
980 
981         /***
982          * field name.
983          */
984         private String fieldName;
985 
986         /***
987          * Field where the evaluated value is saved.
988          */
989         private Field location;
990 
991         /***
992          * Parser for the configuration property.
993          */
994         private ParseProperty parser;
995 
996         /***
997          * Instantiates a new Flag.
998          * @param name option name
999          * @param fieldName field name (can be null)
1000          * @param parser parser for property
1001          */
1002         Flag(String name, String fieldName, ParseProperty parser)
1003         {
1004 
1005             this.fieldName = fieldName;
1006             this.name = name;
1007             this.parser = parser;
1008         }
1009 
1010         /***
1011          * Getter for <code>location</code>.
1012          * @return Returns the location.
1013          */
1014         public Field getLocation()
1015         {
1016             // lazy initialization to speed up loading
1017             if (fieldName != null && this.location == null)
1018             {
1019                 try
1020                 {
1021                     this.location = Configuration.class.getDeclaredField(fieldName);
1022                 }
1023                 catch (NoSuchFieldException e)
1024                 {
1025                     throw new RuntimeException("NoSuchField exception during config initialization for field "
1026                         + fieldName);
1027                 }
1028                 catch (SecurityException e)
1029                 {
1030                     throw new RuntimeException("Security exception during config initialization for field "
1031                         + fieldName
1032                         + ": "
1033                         + e.getMessage());
1034                 }
1035             }
1036 
1037             return this.location;
1038         }
1039 
1040         /***
1041          * Getter for <code>name</code>.
1042          * @return Returns the name.
1043          */
1044         public String getName()
1045         {
1046             return this.name;
1047         }
1048 
1049         /***
1050          * Getter for <code>parser</code>.
1051          * @return Returns the parser.
1052          */
1053         public ParseProperty getParser()
1054         {
1055             return this.parser;
1056         }
1057 
1058         /***
1059          * @see java.lang.Object#equals(java.lang.Object)
1060          */
1061         public boolean equals(Object obj)
1062         {
1063             return this.name.equals(((Flag) obj).name);
1064         }
1065 
1066         /***
1067          * @see java.lang.Object#hashCode()
1068          */
1069         public int hashCode()
1070         {
1071             // returning the hashCode of String, to be consistent with equals and compareTo
1072             return this.name.hashCode();
1073         }
1074 
1075         /***
1076          * @see java.lang.Comparable#compareTo(java.lang.Object)
1077          */
1078         public int compareTo(Object o)
1079         {
1080             return this.name.compareTo(((Flag) o).name);
1081         }
1082 
1083     }
1084 
1085     /***
1086      * Getter for <code>inCharEncoding</code>.
1087      * @return Returns the inCharEncoding.
1088      * @deprecated use getInCharEncodingName()
1089      */
1090     protected int getInCharEncoding()
1091     {
1092         return this.inCharEncoding;
1093     }
1094 
1095     /***
1096      * Setter for <code>inCharEncoding</code>.
1097      * @param encoding The inCharEncoding to set.
1098      * @deprecated use setInCharEncodingName(String)
1099      */
1100     protected void setInCharEncoding(int encoding)
1101     {
1102         if (encoding == RAW)
1103         {
1104             rawOut = true;
1105         }
1106         else
1107         {
1108             rawOut = false;
1109             this.inCharEncoding = encoding;
1110         }
1111     }
1112 
1113     /***
1114      * Getter for <code>inCharEncodingName</code>.
1115      * @return Returns the inCharEncodingName.
1116      */
1117     protected String getInCharEncodingName()
1118     {
1119         return this.inCharEncodingName;
1120     }
1121 
1122     /***
1123      * Setter for <code>inCharEncodingName</code>.
1124      * @param encoding The inCharEncodingName to set.
1125      */
1126     protected void setInCharEncodingName(String encoding)
1127     {
1128         String javaEncoding = EncodingNameMapper.toJava(encoding);
1129         if (javaEncoding != null)
1130         {
1131             this.inCharEncodingName = javaEncoding;
1132             this.inCharEncoding = convertCharEncoding(javaEncoding);
1133         }
1134     }
1135 
1136     /***
1137      * Getter for <code>outCharEncoding</code>.
1138      * @return Returns the outCharEncoding.
1139      * @deprecated use getOutCharEncodingName()
1140      */
1141     protected int getOutCharEncoding()
1142     {
1143         return this.outCharEncoding;
1144     }
1145 
1146     /***
1147      * Setter for <code>outCharEncoding</code>.
1148      * @param encoding The outCharEncoding to set.
1149      * @deprecated use setOutCharEncodingName(String)
1150      */
1151     protected void setOutCharEncoding(int encoding)
1152     {
1153         switch (encoding)
1154         {
1155             case RAW :
1156                 this.rawOut = true;
1157                 break;
1158 
1159             case MACROMAN :
1160             case WIN1252 :
1161                 this.rawOut = false;
1162                 this.outCharEncoding = ASCII;
1163                 break;
1164 
1165             default :
1166                 this.rawOut = false;
1167                 this.outCharEncoding = encoding;
1168                 break;
1169         }
1170     }
1171 
1172     /***
1173      * Getter for <code>outCharEncodingName</code>.
1174      * @return Returns the outCharEncodingName.
1175      */
1176     protected String getOutCharEncodingName()
1177     {
1178         return this.outCharEncodingName;
1179     }
1180 
1181     /***
1182      * Setter for <code>outCharEncodingName</code>.
1183      * @param encoding The outCharEncodingName to set.
1184      */
1185     protected void setOutCharEncodingName(String encoding)
1186     {
1187         String javaEncoding = EncodingNameMapper.toJava(encoding);
1188         if (javaEncoding != null)
1189         {
1190             this.outCharEncodingName = javaEncoding;
1191             this.outCharEncoding = convertCharEncoding(javaEncoding);
1192         }
1193     }
1194 
1195     /***
1196      * Setter for <code>inOutCharEncodingName</code>.
1197      * @param encoding The CharEncodingName to set.
1198      */
1199     protected void setInOutEncodingName(String encoding)
1200     {
1201         setInCharEncodingName(encoding);
1202         setOutCharEncodingName(encoding);
1203     }
1204 
1205     /***
1206      * Convert a char encoding from the deprecated tidy constant to a standard java encoding name.
1207      * @param code encoding code
1208      * @return encoding name
1209      */
1210     protected String convertCharEncoding(int code)
1211     {
1212         if (code != 0 && code < ENCODING_NAMES.length)
1213         {
1214             return ENCODING_NAMES[code];
1215         }
1216         return null;
1217     }
1218 
1219     /***
1220      * Convert a char encoding from a standard java encoding name to the deprecated tidy constant.
1221      * @param name encoding name
1222      * @return encoding code
1223      */
1224     protected int convertCharEncoding(String name)
1225     {
1226         if (name == null)
1227         {
1228             return -1;
1229         }
1230 
1231         for (int j = 1; j < ENCODING_NAMES.length; j++)
1232         {
1233             if (name.equals(ENCODING_NAMES[j]))
1234             {
1235                 return j;
1236             }
1237         }
1238 
1239         return -1;
1240     }
1241 
1242 }