View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.FileInputStream;
57  import java.io.IOException;
58  import java.io.Serializable;
59  import java.io.Writer;
60  import java.lang.reflect.Field;
61  import java.util.ArrayList;
62  import java.util.Collections;
63  import java.util.Enumeration;
64  import java.util.HashMap;
65  import java.util.Iterator;
66  import java.util.List;
67  import java.util.Map;
68  import java.util.Properties;
69  
70  
71  /**
72   * Read configuration file and manage configuration properties. Configuration files associate a property name with a
73   * value. The format is that of a Java .properties file.
74   * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
75   * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
76   * @author Fabrizio Giustina
77   * @version $Revision: 807 $ ($Author: fgiust $)
78   */
79  public class Configuration implements Serializable
80  {
81  
82      /**
83       * character encoding = RAW.
84       * @deprecated use <code>Tidy.setRawOut(true)</code> for raw output
85       */
86      public static final int RAW = 0;
87  
88      /**
89       * character encoding = ASCII.
90       * @deprecated
91       */
92      public static final int ASCII = 1;
93  
94      /**
95       * character encoding = LATIN1.
96       * @deprecated
97       */
98      public static final int LATIN1 = 2;
99  
100     /**
101      * character encoding = UTF8.
102      * @deprecated
103      */
104     public static final int UTF8 = 3;
105 
106     /**
107      * character encoding = ISO2022.
108      * @deprecated
109      */
110     public static final int ISO2022 = 4;
111 
112     /**
113      * character encoding = MACROMAN.
114      * @deprecated
115      */
116     public static final int MACROMAN = 5;
117 
118     /**
119      * character encoding = UTF16LE.
120      * @deprecated
121      */
122     public static final int UTF16LE = 6;
123 
124     /**
125      * character encoding = UTF16BE.
126      * @deprecated
127      */
128     public static final int UTF16BE = 7;
129 
130     /**
131      * character encoding = UTF16.
132      * @deprecated
133      */
134     public static final int UTF16 = 8;
135 
136     /**
137      * character encoding = WIN1252.
138      * @deprecated
139      */
140     public static final int WIN1252 = 9;
141 
142     /**
143      * character encoding = BIG5.
144      * @deprecated
145      */
146     public static final int BIG5 = 10;
147 
148     /**
149      * character encoding = SHIFTJIS.
150      * @deprecated
151      */
152     public static final int SHIFTJIS = 11;
153 
154     /**
155      * Convert from deprecated tidy encoding constant to standard java encoding name.
156      */
157     private final String[] ENCODING_NAMES = new String[]{"raw", // rawOut, it will not be mapped to a java encoding
158         "ASCII",
159         "ISO8859_1",
160         "UTF8",
161         "JIS",
162         "MacRoman",
163         "UnicodeLittle",
164         "UnicodeBig",
165         "Unicode",
166         "Cp1252",
167         "Big5",
168         "SJIS"};
169 
170     /**
171      * treatment of doctype: omit.
172      * @todo should be an enumeration DocTypeMode
173      */
174     public static final int DOCTYPE_OMIT = 0;
175 
176     /**
177      * treatment of doctype: auto.
178      */
179     public static final int DOCTYPE_AUTO = 1;
180 
181     /**
182      * treatment of doctype: strict.
183      */
184     public static final int DOCTYPE_STRICT = 2;
185 
186     /**
187      * treatment of doctype: loose.
188      */
189     public static final int DOCTYPE_LOOSE = 3;
190 
191     /**
192      * treatment of doctype: user.
193      */
194     public static final int DOCTYPE_USER = 4;
195 
196     /**
197      * Keep last duplicate attribute.
198      * @todo should be an enumeration DupAttrMode
199      */
200     public static final int KEEP_LAST = 0;
201 
202     /**
203      * Keep first duplicate attribute.
204      */
205     public static final int KEEP_FIRST = 1;
206 
207     /**
208      * Map containg all the valid configuration options and the related parser. Tag entry contains String(option
209      * name)-Flag instance.
210      */
211     private static final Map OPTIONS = new HashMap();
212 
213     /**
214      * serial version UID for this class.
215      */
216     private static final long serialVersionUID = -4955155037138560842L;
217 
218     static
219     {
220         addConfigOption(new Flag("indent-spaces", "spaces", ParsePropertyImpl.INT));
221         addConfigOption(new Flag("wrap", "wraplen", ParsePropertyImpl.INT));
222         addConfigOption(new Flag("show-errors", "showErrors", ParsePropertyImpl.INT));
223         addConfigOption(new Flag("tab-size", "tabsize", ParsePropertyImpl.INT));
224 
225         addConfigOption(new Flag("wrap-attributes", "wrapAttVals", ParsePropertyImpl.BOOL));
226         addConfigOption(new Flag("wrap-script-literals", "wrapScriptlets", ParsePropertyImpl.BOOL));
227         addConfigOption(new Flag("wrap-sections", "wrapSection", ParsePropertyImpl.BOOL));
228         addConfigOption(new Flag("wrap-asp", "wrapAsp", ParsePropertyImpl.BOOL));
229         addConfigOption(new Flag("wrap-jste", "wrapJste", ParsePropertyImpl.BOOL));
230         addConfigOption(new Flag("wrap-php", "wrapPhp", ParsePropertyImpl.BOOL));
231         addConfigOption(new Flag("literal-attributes", "literalAttribs", ParsePropertyImpl.BOOL));
232         addConfigOption(new Flag("show-body-only", "bodyOnly", ParsePropertyImpl.BOOL));
233         addConfigOption(new Flag("fix-uri", "fixUri", ParsePropertyImpl.BOOL));
234         addConfigOption(new Flag("lower-literals", "lowerLiterals", ParsePropertyImpl.BOOL));
235         addConfigOption(new Flag("hide-comments", "hideComments", ParsePropertyImpl.BOOL));
236         addConfigOption(new Flag("indent-cdata", "indentCdata", ParsePropertyImpl.BOOL));
237         addConfigOption(new Flag("force-output", "forceOutput", ParsePropertyImpl.BOOL));
238         addConfigOption(new Flag("ascii-chars", "asciiChars", ParsePropertyImpl.BOOL));
239         addConfigOption(new Flag("join-classes", "joinClasses", ParsePropertyImpl.BOOL));
240         addConfigOption(new Flag("join-styles", "joinStyles", ParsePropertyImpl.BOOL));
241         addConfigOption(new Flag("escape-cdata", "escapeCdata", ParsePropertyImpl.BOOL));
242         addConfigOption(new Flag("replace-color", "replaceColor", ParsePropertyImpl.BOOL));
243         addConfigOption(new Flag("quiet", "quiet", ParsePropertyImpl.BOOL));
244         addConfigOption(new Flag("tidy-mark", "tidyMark", ParsePropertyImpl.BOOL));
245         addConfigOption(new Flag("indent-attributes", "indentAttributes", ParsePropertyImpl.BOOL));
246         addConfigOption(new Flag("hide-endtags", "hideEndTags", ParsePropertyImpl.BOOL));
247         addConfigOption(new Flag("input-xml", "xmlTags", ParsePropertyImpl.BOOL));
248         addConfigOption(new Flag("output-xml", "xmlOut", ParsePropertyImpl.BOOL));
249         addConfigOption(new Flag("output-html", "htmlOut", ParsePropertyImpl.BOOL));
250         addConfigOption(new Flag("output-xhtml", "xHTML", ParsePropertyImpl.BOOL));
251         addConfigOption(new Flag("add-xml-pi", "xmlPi", ParsePropertyImpl.BOOL));
252         addConfigOption(new Flag("add-xml-decl", "xmlPi", ParsePropertyImpl.BOOL));
253         addConfigOption(new Flag("assume-xml-procins", "xmlPIs", ParsePropertyImpl.BOOL));
254         addConfigOption(new Flag("uppercase-tags", "upperCaseTags", ParsePropertyImpl.BOOL));
255         addConfigOption(new Flag("uppercase-attributes", "upperCaseAttrs", ParsePropertyImpl.BOOL));
256         addConfigOption(new Flag("bare", "makeBare", ParsePropertyImpl.BOOL));
257         addConfigOption(new Flag("clean", "makeClean", ParsePropertyImpl.BOOL));
258         addConfigOption(new Flag("logical-emphasis", "logicalEmphasis", ParsePropertyImpl.BOOL));
259         addConfigOption(new Flag("word-2000", "word2000", ParsePropertyImpl.BOOL));
260         addConfigOption(new Flag("drop-empty-paras", "dropEmptyParas", ParsePropertyImpl.BOOL));
261         addConfigOption(new Flag("drop-font-tags", "dropFontTags", ParsePropertyImpl.BOOL));
262         addConfigOption(new Flag("drop-proprietary-attributes", "dropProprietaryAttributes", ParsePropertyImpl.BOOL));
263         addConfigOption(new Flag("enclose-text", "encloseBodyText", ParsePropertyImpl.BOOL));
264         addConfigOption(new Flag("enclose-block-text", "encloseBlockText", ParsePropertyImpl.BOOL));
265         addConfigOption(new Flag("add-xml-space", "xmlSpace", ParsePropertyImpl.BOOL));
266         addConfigOption(new Flag("fix-bad-comments", "fixComments", ParsePropertyImpl.BOOL));
267         addConfigOption(new Flag("split", "burstSlides", ParsePropertyImpl.BOOL));
268         addConfigOption(new Flag("break-before-br", "breakBeforeBR", ParsePropertyImpl.BOOL));
269         addConfigOption(new Flag("numeric-entities", "numEntities", ParsePropertyImpl.BOOL));
270         addConfigOption(new Flag("quote-marks", "quoteMarks", ParsePropertyImpl.BOOL));
271         addConfigOption(new Flag("quote-nbsp", "quoteNbsp", ParsePropertyImpl.BOOL));
272         addConfigOption(new Flag("quote-ampersand", "quoteAmpersand", ParsePropertyImpl.BOOL));
273         addConfigOption(new Flag("write-back", "writeback", ParsePropertyImpl.BOOL));
274         addConfigOption(new Flag("keep-time", "keepFileTimes", ParsePropertyImpl.BOOL));
275         addConfigOption(new Flag("show-warnings", "showWarnings", ParsePropertyImpl.BOOL));
276         addConfigOption(new Flag("ncr", "ncr", ParsePropertyImpl.BOOL));
277         addConfigOption(new Flag("fix-backslash", "fixBackslash", ParsePropertyImpl.BOOL));
278         addConfigOption(new Flag("gnu-emacs", "emacs", ParsePropertyImpl.BOOL));
279         addConfigOption(new Flag("only-errors", "onlyErrors", ParsePropertyImpl.BOOL));
280         addConfigOption(new Flag("output-raw", "rawOut", ParsePropertyImpl.BOOL));
281         addConfigOption(new Flag("trim-empty-elements", "trimEmpty", ParsePropertyImpl.BOOL));
282 
283         addConfigOption(new Flag("markup", "onlyErrors", ParsePropertyImpl.INVBOOL));
284 
285         addConfigOption(new Flag("char-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
286         addConfigOption(new Flag("input-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
287         addConfigOption(new Flag("output-encoding", null, ParsePropertyImpl.CHAR_ENCODING));
288 
289         addConfigOption(new Flag("error-file", "errfile", ParsePropertyImpl.NAME));
290         addConfigOption(new Flag("slide-style", "slidestyle", ParsePropertyImpl.NAME));
291         addConfigOption(new Flag("language", "language", ParsePropertyImpl.NAME));
292 
293         addConfigOption(new Flag("new-inline-tags", null, ParsePropertyImpl.TAGNAMES));
294         addConfigOption(new Flag("new-blocklevel-tags", null, ParsePropertyImpl.TAGNAMES));
295         addConfigOption(new Flag("new-empty-tags", null, ParsePropertyImpl.TAGNAMES));
296         addConfigOption(new Flag("new-pre-tags", null, ParsePropertyImpl.TAGNAMES));
297 
298         addConfigOption(new Flag("doctype", "docTypeStr", ParsePropertyImpl.DOCTYPE));
299 
300         addConfigOption(new Flag("repeated-attributes", "duplicateAttrs", ParsePropertyImpl.REPEATED_ATTRIBUTES));
301 
302         addConfigOption(new Flag("alt-text", "altText", ParsePropertyImpl.STRING));
303 
304         addConfigOption(new Flag("indent", "indentContent", ParsePropertyImpl.INDENT));
305 
306         addConfigOption(new Flag("css-prefix", "cssPrefix", ParsePropertyImpl.CSS1SELECTOR));
307 
308         addConfigOption(new Flag("newline", null, ParsePropertyImpl.NEWLINE));
309     }
310 
311     /**
312      * default indentation.
313      */
314     protected int spaces = 2;
315 
316     /**
317      * default wrap margin (68).
318      */
319     protected int wraplen = 68;
320 
321     /**
322      * default tab size (8).
323      */
324     protected int tabsize = 8;
325 
326     /**
327      * see doctype property.
328      */
329     protected int docTypeMode = DOCTYPE_AUTO;
330 
331     /**
332      * Keep first or last duplicate attribute.
333      */
334     protected int duplicateAttrs = KEEP_LAST;
335 
336     /**
337      * default text for alt attribute.
338      */
339     protected String altText;
340 
341     /**
342      * style sheet for slides.
343      * @deprecated does nothing
344      */
345     protected String slidestyle;
346 
347     /**
348      * RJ language property.
349      */
350     protected String language; // #431953
351 
352     /**
353      * user specified doctype.
354      */
355     protected String docTypeStr;
356 
357     /**
358      * file name to write errors to.
359      */
360     protected String errfile;
361 
362     /**
363      * if true then output tidied markup.
364      */
365     protected boolean writeback;
366 
367     /**
368      * if true normal output is suppressed.
369      */
370     protected boolean onlyErrors;
371 
372     /**
373      * however errors are always shown.
374      */
375     protected boolean showWarnings = true;
376 
377     /**
378      * no 'Parsing X', guessed DTD or summary.
379      */
380     protected boolean quiet;
381 
382     /**
383      * indent content of appropriate tags.
384      */
385     protected boolean indentContent;
386 
387     /**
388      * does text/block level content effect indentation.
389      */
390     protected boolean smartIndent;
391 
392     /**
393      * suppress optional end tags.
394      */
395     protected boolean hideEndTags;
396 
397     /**
398      * treat input as XML.
399      */
400     protected boolean xmlTags;
401 
402     /**
403      * create output as XML.
404      */
405     protected boolean xmlOut;
406 
407     /**
408      * output extensible HTML.
409      */
410     protected boolean xHTML;
411 
412     /**
413      * output plain-old HTML, even for XHTML input. Yes means set explicitly.
414      */
415     protected boolean htmlOut;
416 
417     /**
418      * add <code>&lt;?xml?&gt;</code> for XML docs.
419      */
420     protected boolean xmlPi;
421 
422     /**
423      * output tags in upper not lower case.
424      */
425     protected boolean upperCaseTags;
426 
427     /**
428      * output attributes in upper not lower case.
429      */
430     protected boolean upperCaseAttrs;
431 
432     /**
433      * remove presentational clutter.
434      */
435     protected boolean makeClean;
436 
437     /**
438      * Make bare HTML: remove Microsoft cruft.
439      */
440     protected boolean makeBare;
441 
442     /**
443      * replace i by em and b by strong.
444      */
445     protected boolean logicalEmphasis;
446 
447     /**
448      * discard presentation tags.
449      */
450     protected boolean dropFontTags;
451 
452     /**
453      * discard proprietary attributes.
454      */
455     protected boolean dropProprietaryAttributes;
456 
457     /**
458      * discard empty p elements.
459      */
460     protected boolean dropEmptyParas = true;
461 
462     /**
463      * fix comments with adjacent hyphens.
464      */
465     protected boolean fixComments = true;
466 
467     /**
468      * trim empty elements.
469      */
470     protected boolean trimEmpty = true;
471 
472     /**
473      * o/p newline before br or not?
474      */
475     protected boolean breakBeforeBR;
476 
477     /**
478      * create slides on each h2 element.
479      */
480     protected boolean burstSlides;
481 
482     /**
483      * use numeric entities.
484      */
485     protected boolean numEntities;
486 
487     /**
488      * output " marks as &quot;.
489      */
490     protected boolean quoteMarks;
491 
492     /**
493      * output non-breaking space as entity.
494      */
495     protected boolean quoteNbsp = true;
496 
497     /**
498      * output naked ampersand as &amp;.
499      */
500     protected boolean quoteAmpersand = true;
501 
502     /**
503      * wrap within attribute values.
504      */
505     protected boolean wrapAttVals;
506 
507     /**
508      * wrap within JavaScript string literals.
509      */
510     protected boolean wrapScriptlets;
511 
512     /**
513      * wrap within CDATA section tags.
514      */
515     protected boolean wrapSection = true;
516 
517     /**
518      * wrap within ASP pseudo elements.
519      */
520     protected boolean wrapAsp = true;
521 
522     /**
523      * wrap within JSTE pseudo elements.
524      */
525     protected boolean wrapJste = true;
526 
527     /**
528      * wrap within PHP pseudo elements.
529      */
530     protected boolean wrapPhp = true;
531 
532     /**
533      * fix URLs by replacing \ with /.
534      */
535     protected boolean fixBackslash = true;
536 
537     /**
538      * newline+indent before each attribute.
539      */
540     protected boolean indentAttributes;
541 
542     /**
543      * If set to yes PIs must end with <code>?&gt;</code>.
544      */
545     protected boolean xmlPIs;
546 
547     /**
548      * if set to yes adds xml:space attr as needed.
549      */
550     protected boolean xmlSpace;
551 
552     /**
553      * if yes text at body is wrapped in p's.
554      */
555     protected boolean encloseBodyText;
556 
557     /**
558      * if yes text in blocks is wrapped in p's.
559      */
560     protected boolean encloseBlockText;
561 
562     /**
563      * if yes last modied time is preserved.
564      */
565     protected boolean keepFileTimes = true;
566 
567     /**
568      * draconian cleaning for Word2000.
569      */
570     protected boolean word2000;
571 
572     /**
573      * add meta element indicating tidied doc.
574      */
575     protected boolean tidyMark = true;
576 
577     /**
578      * if true format error output for GNU Emacs.
579      */
580     protected boolean emacs;
581 
582     /**
583      * if true attributes may use newlines.
584      */
585     protected boolean literalAttribs;
586 
587     /**
588      * output BODY content only.
589      */
590     protected boolean bodyOnly;
591 
592     /**
593      * properly escape URLs.
594      */
595     protected boolean fixUri = true;
596 
597     /**
598      * folds known attribute values to lower case.
599      */
600     protected boolean lowerLiterals = true;
601 
602     /**
603      * replace hex color attribute values with names.
604      */
605     protected boolean replaceColor;
606 
607     /**
608      * hides all (real) comments in output.
609      */
610     protected boolean hideComments;
611 
612     /**
613      * indent CDATA sections.
614      */
615     protected boolean indentCdata;
616 
617     /**
618      * output document even if errors were found.
619      */
620     protected boolean forceOutput;
621 
622     /**
623      * number of errors to put out.
624      */
625     protected int showErrors = 6;
626 
627     /**
628      * convert quotes and dashes to nearest ASCII char.
629      */
630     protected boolean asciiChars = true;
631 
632     /**
633      * join multiple class attributes.
634      */
635     protected boolean joinClasses;
636 
637     /**
638      * join multiple style attributes.
639      */
640     protected boolean joinStyles = true;
641 
642     /**
643      * replace CDATA sections with escaped text.
644      */
645     protected boolean escapeCdata = true;
646 
647     /**
648      * allow numeric character references.
649      */
650     protected boolean ncr = true; // #431953
651 
652     /**
653      * CSS class naming for -clean option.
654      */
655     protected String cssPrefix;
656 
657     /**
658      * char encoding used when replacing illegal SGML chars, regardless of specified encoding.
659      */
660     protected String replacementCharEncoding = "WIN1252"; // by default
661 
662     /**
663      * TagTable associated with this Configuration.
664      */
665     protected TagTable tt;
666 
667     /**
668      * Report instance. Used for messages.
669      */
670     protected Report report;
671 
672     /**
673      * track what types of tags user has defined to eliminate unnecessary searches.
674      */
675     protected int definedTags;
676 
677     /**
678      * bytes for the newline marker.
679      */
680     protected char[] newline = (System.getProperty("line.separator")).toCharArray();
681 
682     /**
683      * Input character encoding (defaults to "ISO8859_1").
684      */
685     private String inCharEncoding = "ISO8859_1";
686 
687     /**
688      * Output character encoding (defaults to "ASCII").
689      */
690     private String outCharEncoding = "ASCII";
691 
692     /**
693      * Avoid mapping values > 127 to entities.
694      */
695     protected boolean rawOut;
696 
697     /**
698      * configuration properties.
699      */
700     private transient Properties properties = new Properties();
701 
702     /**
703      * Instantiates a new Configuration. This method should be called by Tidy only.
704      * @param report Report instance
705      */
706     protected Configuration(Report report)
707     {
708         this.report = report;
709     }
710 
711     /**
712      * adds a config option to the map.
713      * @param flag configuration options added
714      */
715     private static void addConfigOption(Flag flag)
716     {
717         OPTIONS.put(flag.getName(), flag);
718     }
719 
720     /**
721      * adds configuration Properties.
722      * @param p Properties
723      */
724     public void addProps(Properties p)
725     {
726         Enumeration propEnum = p.propertyNames();
727         while (propEnum.hasMoreElements())
728         {
729             String key = (String) propEnum.nextElement();
730             String value = p.getProperty(key);
731             properties.put(key, value);
732         }
733         parseProps();
734     }
735 
736     /**
737      * Parses a property file.
738      * @param filename file name
739      */
740     public void parseFile(String filename)
741     {
742         try
743         {
744             properties.load(new FileInputStream(filename));
745         }
746         catch (IOException e)
747         {
748             System.err.println(filename + " " + e.toString());
749             return;
750         }
751         parseProps();
752     }
753 
754     /**
755      * Is the given String a valid configuration flag?
756      * @param name configuration parameter name
757      * @return <code>true</code> if the given String is a valid config option
758      */
759     public static boolean isKnownOption(String name)
760     {
761         return name != null && OPTIONS.containsKey(name);
762     }
763 
764     /**
765      * Parses the configuration properties file.
766      */
767     private void parseProps()
768     {
769         Iterator iterator = properties.keySet().iterator();
770 
771         while (iterator.hasNext())
772         {
773             String key = (String) iterator.next();
774             Flag flag = (Flag) OPTIONS.get(key);
775             if (flag == null)
776             {
777                 report.unknownOption(key);
778                 continue;
779             }
780 
781             String stringValue = properties.getProperty(key);
782             Object value = flag.getParser().parse(stringValue, key, this);
783             if (flag.getLocation() != null)
784             {
785                 try
786                 {
787                     flag.getLocation().set(this, value);
788                 }
789                 catch (IllegalArgumentException e)
790                 {
791                     throw new RuntimeException("IllegalArgumentException during config initialization for field "
792                         + key
793                         + "with value ["
794                         + value
795                         + "]: "
796                         + e.getMessage());
797                 }
798                 catch (IllegalAccessException e)
799                 {
800                     throw new RuntimeException("IllegalArgumentException during config initialization for field "
801                         + key
802                         + "with value ["
803                         + value
804                         + "]: "
805                         + e.getMessage());
806                 }
807             }
808         }
809     }
810 
811     /**
812      * Ensure that config is self consistent.
813      */
814     public void adjust()
815     {
816         if (encloseBlockText)
817         {
818             encloseBodyText = true;
819         }
820 
821         // avoid the need to set IndentContent when SmartIndent is set
822         if (smartIndent)
823         {
824             indentContent = true;
825         }
826 
827         // disable wrapping
828         if (wraplen == 0)
829         {
830             wraplen = 0x7FFFFFFF;
831         }
832 
833         // Word 2000 needs o:p to be declared as inline
834         if (word2000)
835         {
836             definedTags |= Dict.TAGTYPE_INLINE;
837             tt.defineTag(Dict.TAGTYPE_INLINE, "o:p");
838         }
839 
840         // #480701 disable XHTML output flag if both output-xhtml and xml are set
841         if (xmlTags)
842         {
843             xHTML = false;
844         }
845 
846         // XHTML is written in lower case
847         if (xHTML)
848         {
849             xmlOut = true;
850             upperCaseTags = false;
851             upperCaseAttrs = false;
852         }
853 
854         // if XML in, then XML out
855         if (xmlTags)
856         {
857             xmlOut = true;
858             xmlPIs = true;
859         }
860 
861         // #427837 - fix by Dave Raggett 02 Jun 01
862         // generate <?xml version="1.0" encoding="iso-8859-1"?> if the output character encoding is Latin-1 etc.
863         if (!"UTF8".equals(getOutCharEncodingName()) && !"ASCII".equals(getOutCharEncodingName()) && xmlOut)
864         {
865             xmlPi = true;
866         }
867 
868         // XML requires end tags
869         if (xmlOut)
870         {
871             quoteAmpersand = true;
872             hideEndTags = false;
873         }
874     }
875 
876     /**
877      * prints available configuration options.
878      * @param errout where to write
879      * @param showActualConfiguration print actual configuration values
880      */
881     void printConfigOptions(Writer errout, boolean showActualConfiguration)
882     {
883         String pad = "                                                                               ";
884         try
885         {
886             errout.write("\nConfiguration File Settings:\n\n");
887 
888             if (showActualConfiguration)
889             {
890                 errout.write("Name                        Type       Current Value\n");
891             }
892             else
893             {
894                 errout.write("Name                        Type       Allowable values\n");
895             }
896 
897             errout.write("=========================== =========  ========================================\n");
898 
899             Flag configItem;
900 
901             // sort configuration options
902             List values = new ArrayList(OPTIONS.values());
903             Collections.sort(values);
904 
905             Iterator iterator = values.iterator();
906 
907             while (iterator.hasNext())
908             {
909                 configItem = (Flag) iterator.next();
910 
911                 errout.write(configItem.getName());
912                 errout.write(pad, 0, 28 - configItem.getName().length());
913 
914                 errout.write(configItem.getParser().getType());
915                 errout.write(pad, 0, 11 - configItem.getParser().getType().length());
916 
917                 if (showActualConfiguration)
918                 {
919                     Field field = configItem.getLocation();
920                     Object actualValue = null;
921 
922                     if (field != null)
923                     {
924                         try
925                         {
926                             actualValue = field.get(this);
927                         }
928                         catch (IllegalArgumentException e1)
929                         {
930                             // should never happen
931                             throw new RuntimeException("IllegalArgument when reading field " + field.getName());
932                         }
933                         catch (IllegalAccessException e1)
934                         {
935                             // should never happen
936                             throw new RuntimeException("IllegalAccess when reading field " + field.getName());
937                         }
938                     }
939 
940                     errout.write(configItem.getParser().getFriendlyName(configItem.getName(), actualValue, this));
941                 }
942                 else
943                 {
944                     errout.write(configItem.getParser().getOptionValues());
945                 }
946 
947                 errout.write("\n");
948 
949             }
950             errout.flush();
951         }
952         catch (IOException e)
953         {
954             throw new RuntimeException(e.getMessage());
955         }
956 
957     }
958 
959     /**
960      * A configuration option.
961      */
962     static class Flag implements Comparable
963     {
964 
965         /**
966          * option name.
967          */
968         private String name;
969 
970         /**
971          * field name.
972          */
973         private String fieldName;
974 
975         /**
976          * Field where the evaluated value is saved.
977          */
978         private Field location;
979 
980         /**
981          * Parser for the configuration property.
982          */
983         private ParseProperty parser;
984 
985         /**
986          * Instantiates a new Flag.
987          * @param name option name
988          * @param fieldName field name (can be null)
989          * @param parser parser for property
990          */
991         Flag(String name, String fieldName, ParseProperty parser)
992         {
993 
994             this.fieldName = fieldName;
995             this.name = name;
996             this.parser = parser;
997         }
998 
999         /**
1000          * Getter for <code>location</code>.
1001          * @return Returns the location.
1002          */
1003         public Field getLocation()
1004         {
1005             // lazy initialization to speed up loading
1006             if (fieldName != null && this.location == null)
1007             {
1008                 try
1009                 {
1010                     this.location = Configuration.class.getDeclaredField(fieldName);
1011                 }
1012                 catch (NoSuchFieldException e)
1013                 {
1014                     throw new RuntimeException("NoSuchField exception during config initialization for field "
1015                         + fieldName);
1016                 }
1017                 catch (SecurityException e)
1018                 {
1019                     throw new RuntimeException("Security exception during config initialization for field "
1020                         + fieldName
1021                         + ": "
1022                         + e.getMessage());
1023                 }
1024             }
1025 
1026             return this.location;
1027         }
1028 
1029         /**
1030          * Getter for <code>name</code>.
1031          * @return Returns the name.
1032          */
1033         public String getName()
1034         {
1035             return this.name;
1036         }
1037 
1038         /**
1039          * Getter for <code>parser</code>.
1040          * @return Returns the parser.
1041          */
1042         public ParseProperty getParser()
1043         {
1044             return this.parser;
1045         }
1046 
1047         /**
1048          * @see java.lang.Object#equals(java.lang.Object)
1049          */
1050         public boolean equals(Object obj)
1051         {
1052             return this.name.equals(((Flag) obj).name);
1053         }
1054 
1055         /**
1056          * @see java.lang.Object#hashCode()
1057          */
1058         public int hashCode()
1059         {
1060             // returning the hashCode of String, to be consistent with equals and compareTo
1061             return this.name.hashCode();
1062         }
1063 
1064         /**
1065          * @see java.lang.Comparable#compareTo(java.lang.Object)
1066          */
1067         public int compareTo(Object o)
1068         {
1069             return this.name.compareTo(((Flag) o).name);
1070         }
1071 
1072     }
1073 
1074     /**
1075      * Getter for <code>inCharEncodingName</code>.
1076      * @return Returns the inCharEncodingName.
1077      */
1078     protected String getInCharEncodingName()
1079     {
1080         return this.inCharEncoding;
1081     }
1082 
1083     /**
1084      * Setter for <code>inCharEncodingName</code>.
1085      * @param encoding The inCharEncodingName to set.
1086      */
1087     protected void setInCharEncodingName(String encoding)
1088     {
1089         String javaEncoding = EncodingNameMapper.toJava(encoding);
1090         if (javaEncoding != null)
1091         {
1092             this.inCharEncoding = javaEncoding;
1093         }
1094     }
1095 
1096     /**
1097      * Getter for <code>outCharEncodingName</code>.
1098      * @return Returns the outCharEncodingName.
1099      */
1100     protected String getOutCharEncodingName()
1101     {
1102         return this.outCharEncoding;
1103     }
1104 
1105     /**
1106      * Setter for <code>outCharEncodingName</code>.
1107      * @param encoding The outCharEncodingName to set.
1108      */
1109     protected void setOutCharEncodingName(String encoding)
1110     {
1111         String javaEncoding = EncodingNameMapper.toJava(encoding);
1112         if (javaEncoding != null)
1113         {
1114             this.outCharEncoding = javaEncoding;
1115         }
1116     }
1117 
1118     /**
1119      * Setter for <code>inOutCharEncodingName</code>.
1120      * @param encoding The CharEncodingName to set.
1121      */
1122     protected void setInOutEncodingName(String encoding)
1123     {
1124         setInCharEncodingName(encoding);
1125         setOutCharEncodingName(encoding);
1126     }
1127 
1128     /**
1129      * Setter for <code>outCharEncoding</code>.
1130      * @param encoding The outCharEncoding to set.
1131      * @deprecated use setOutCharEncodingName(String)
1132      */
1133     protected void setOutCharEncoding(int encoding)
1134     {
1135         setOutCharEncodingName(convertCharEncoding(encoding));
1136     }
1137 
1138     /**
1139      * Setter for <code>inCharEncoding</code>.
1140      * @param encoding The inCharEncoding to set.
1141      * @deprecated use setInCharEncodingName(String)
1142      */
1143     protected void setInCharEncoding(int encoding)
1144     {
1145         setInCharEncodingName(convertCharEncoding(encoding));
1146     }
1147 
1148     /**
1149      * Convert a char encoding from the deprecated tidy constant to a standard java encoding name.
1150      * @param code encoding code
1151      * @return encoding name
1152      */
1153     protected String convertCharEncoding(int code)
1154     {
1155         if (code != 0 && code < ENCODING_NAMES.length)
1156         {
1157             return ENCODING_NAMES[code];
1158         }
1159         return null;
1160     }
1161 
1162 }