View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy.ant;
55  
56  import java.io.BufferedInputStream;
57  import java.io.BufferedOutputStream;
58  import java.io.ByteArrayOutputStream;
59  import java.io.File;
60  import java.io.FileInputStream;
61  import java.io.FileOutputStream;
62  import java.io.IOException;
63  import java.io.InputStream;
64  import java.io.OutputStream;
65  import java.io.PrintWriter;
66  import java.util.ArrayList;
67  import java.util.Iterator;
68  import java.util.List;
69  import java.util.Properties;
70  
71  import org.apache.tools.ant.BuildException;
72  import org.apache.tools.ant.DirectoryScanner;
73  import org.apache.tools.ant.Project;
74  import org.apache.tools.ant.Task;
75  import org.apache.tools.ant.types.FileSet;
76  import org.apache.tools.ant.types.Parameter;
77  import org.apache.tools.ant.util.FileNameMapper;
78  import org.apache.tools.ant.util.FlatFileNameMapper;
79  import org.apache.tools.ant.util.IdentityMapper;
80  import org.w3c.tidy.Tidy;
81  
82  
83  /**
84   * JTidy ant task.
85   * <h3>Parameters</h3>
86   * <table cellspacing="0" border="1"> <thead>
87   * <tr>
88   * <th>Attribute</th>
89   * <th>Description</th>
90   * <th>Required</th>
91   * </tr>
92   * </thead> <tbody>
93   * <tr>
94   * <td>srcfile</td>
95   * <td>source file</td>
96   * <td>Yes, unless a nested <code>&lt;fileset></code> element is used.</td>
97   * </tr>
98   * <tr>
99   * <td>destfile</td>
100  * <td>destination file for output</td>
101  * <td rowspan="2">With the <code>srcfile</code> attribute, either <code>destfile</code> or <code>destdir</code>
102  * can be used. With nested <code>&lt;fileset></code> elements only <code>destdir</code> is allowed.</td>
103  * </tr>
104  * <tr>
105  * <td>destdir</td>
106  * <td>destination directory for output</td>
107  * </tr>
108  * <tr>
109  * <td>properties</td>
110  * <td>Path to a valid tidy properties file</td>
111  * <td>No</td>
112  * </tr>
113  * <tr>
114  * <td>flatten</td>
115  * <td>Ignore the directory structure of the source files, and copy all files into the directory specified by the
116  * <code>destdir</code> attribute.</td>
117  * <td>No; defaults to false.</td>
118  * </tr>
119  * <tr>
120  * <td>failonerror</td>
121  * <td>boolean to control whether failure to execute should throw a BuildException or just print an error. If set to
122  * <code>true</code> errors in input files which tidy is enable to fix will cause a failure.</td>
123  * <td>No; defaults to false.</td>
124  * </tr>
125  * </tbody> </table>
126  * <h3>Nested elements</h3>
127  * <ul>
128  * <li><strong>Fileset </strong>: if you need to run tidy on more than one file, you can specify nested filesets.</li>
129  * <li><strong>Parameter </strong>: you can specify any tidy configuration option directly using a nested
130  * <code>parameter</code> element.</li>
131  * </ul>
132  * <h3>Setup</h3>
133  * <p>
134  * Adds the following <code>typedef</code> to setup the JTidy task in your build.xml:
135  * </p>
136  * 
137  * <pre>
138  * &lt;taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask"/>
139  * </pre>
140  * 
141  * <p>
142  * This will work if JTidy jar is copied to ant lib directory. If you need to reference the jar elsewhere on the
143  * filesystem you can add a nested classpath element:
144  * </p>
145  * 
146  * <pre>
147  * &lt;taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask">
148  *   &lt;classpath>
149  *     &lt;pathelement location="${lib.dir}/jtidy.jar"/>
150  *   &lt;/classpath>
151  * &lt;/taskdef>
152  * </pre>
153  * 
154  * <h3>Examples</h3>
155  * 
156  * <pre>
157  * &lt;tidy destdir="out" properties="/path/to/tidy.properties">
158  *   &lt;fileset dir="inputdir" />
159  *   &lt;parameter name="drop-font-tags" value="true" />
160  * &lt/tidy>
161  * </pre>
162  * 
163  * @author Fabrizio Giustina
164  * @version $Revision: 807 $ ($Author: fgiust $)
165  */
166 public class JTidyTask extends Task
167 {
168 
169     /**
170      * Filesets.
171      */
172     private List filesets = new ArrayList();
173 
174     /**
175      * Destination directory for output.
176      */
177     private File destdir;
178 
179     /**
180      * Destination file for output.
181      */
182     private File destfile;
183 
184     /**
185      * Source file.
186      */
187     private File srcfile;
188 
189     /**
190      * Control whether failure to execute should throw a BuildException.
191      */
192     private boolean failonerror;
193 
194     /**
195      * Don't output directories.
196      */
197     private boolean flatten;
198 
199     /**
200      * tidy instance.
201      */
202     private Tidy tidy;
203 
204     /**
205      * Configured properties.
206      */
207     private Properties props;
208 
209     /**
210      * Properties file.
211      */
212     private File properties;
213 
214     /**
215      * @param destdir The destdir to set.
216      */
217     public void setDestdir(File destdir)
218     {
219         this.destdir = destdir;
220     }
221 
222     /**
223      * @param destfile The destfile to set.
224      */
225     public void setDestfile(File destfile)
226     {
227         this.destfile = destfile;
228     }
229 
230     /**
231      * @param srcfile The srcfile to set.
232      */
233     public void setSrcfile(File srcfile)
234     {
235         this.srcfile = srcfile;
236     }
237 
238     /**
239      * @param failonerror The failonerror to set.
240      */
241     public void setFailonerror(boolean failonerror)
242     {
243         this.failonerror = failonerror;
244     }
245 
246     /**
247      * @param flatten The flatten to set.
248      */
249     public void setFlatten(boolean flatten)
250     {
251         this.flatten = flatten;
252     }
253 
254     /**
255      * @param properties The properties to set.
256      */
257     public void setProperties(File properties)
258     {
259         this.properties = properties;
260     }
261 
262     /**
263      * Adds a fileset to be processed Fileset
264      * @param fileSet
265      */
266     public void addFileset(FileSet fileSet)
267     {
268         filesets.add(fileSet);
269     }
270 
271     /**
272      * Setter method for any property using the ant type Parameter.
273      * @param prop Ant type Parameter
274      */
275     public void addConfiguredParameter(Parameter prop)
276     {
277         props.setProperty(prop.getName(), prop.getValue());
278     }
279 
280     /**
281      * Initializes the task.
282      */
283     public void init()
284     {
285         super.init();
286 
287         // Setup a Tidy instance
288         tidy = new Tidy();
289         props = new Properties();
290     }
291 
292     /**
293      * Validates task parameters.
294      * @throws BuildException if any invalid parameter is found
295      */
296     protected void validateParameters() throws BuildException
297     {
298         if (srcfile == null && filesets.size() == 0)
299         {
300             throw new BuildException("Specify at least srcfile or a fileset.");
301         }
302         if (srcfile != null && filesets.size() > 0)
303         {
304             throw new BuildException("You can't specify both srcfile and nested filesets.");
305         }
306 
307         if (destfile == null && destdir == null)
308         {
309             throw new BuildException("One of destfile or destdir must be set.");
310         }
311 
312         if (srcfile == null && destfile != null)
313         {
314             throw new BuildException("You only can use destfile with srcfile.");
315         }
316 
317         if (srcfile != null && srcfile.isDirectory())
318         {
319             throw new BuildException("srcfile can't be a directory.");
320         }
321 
322         if (properties != null && properties.isDirectory())
323         {
324             throw new BuildException("Invalid properties file specified: " + properties.getPath());
325         }
326 
327     }
328 
329     /**
330      * Run the task.
331      * @exception BuildException The exception raised during task execution.
332      */
333     public void execute() throws BuildException
334     {
335         // validate
336         validateParameters();
337 
338         // load configuration
339         if (this.properties != null)
340         {
341             try
342             {
343                 this.props.load(new FileInputStream(this.properties));
344             }
345             catch (IOException e)
346             {
347                 throw new BuildException("Unable to load properties file " + properties, e);
348             }
349         }
350 
351         // hide output unless set in properties
352         tidy.setErrout(new PrintWriter(new ByteArrayOutputStream()));
353 
354         tidy.setConfigurationFromProps(props);
355 
356         if (this.srcfile != null)
357         {
358             // process a single file
359             executeSingle();
360         }
361         else
362         {
363             // process filesets
364             executeSet();
365         }
366     }
367 
368     /**
369      * A single file has been specified.
370      */
371     protected void executeSingle()
372     {
373 
374         if (!srcfile.exists())
375         {
376             throw new BuildException("Could not find source file " + srcfile.getAbsolutePath() + ".");
377         }
378 
379         if (destfile == null)
380         {
381             // destdir can't be null, condition is checked in validateParameters()
382             destfile = new File(destdir, srcfile.getName());
383         }
384 
385         processFile(srcfile, destfile);
386     }
387 
388     /**
389      * Run tidy on filesets.
390      */
391     protected void executeSet()
392     {
393 
394         FileNameMapper mapper = null;
395         if (flatten)
396         {
397             mapper = new FlatFileNameMapper();
398         }
399         else
400         {
401             mapper = new IdentityMapper();
402         }
403 
404         mapper.setTo(this.destdir.getAbsolutePath());
405 
406         Iterator iterator = filesets.iterator();
407         while (iterator.hasNext())
408         {
409             FileSet fileSet = (FileSet) iterator.next();
410             DirectoryScanner directoryScanner = fileSet.getDirectoryScanner(getProject());
411             String[] sourceFiles = directoryScanner.getIncludedFiles();
412             File inputdir = directoryScanner.getBasedir();
413 
414             mapper.setFrom(inputdir.getAbsolutePath());
415 
416             for (int j = 0; j < sourceFiles.length; j++)
417             {
418                 String[] mapped = mapper.mapFileName(sourceFiles[j]);
419 
420                 processFile(new File(inputdir, sourceFiles[j]), new File(this.destdir, mapped[0]));
421             }
422         }
423     }
424 
425     /**
426      * Run tidy on a file.
427      * @param inputFile input file
428      * @param outputFile output file
429      */
430     protected void processFile(File inputFile, File outputFile)
431     {
432 
433         log("Processing " + inputFile.getAbsolutePath(), Project.MSG_DEBUG);
434 
435         InputStream is;
436         OutputStream os;
437         try
438         {
439             is = new BufferedInputStream(new FileInputStream(inputFile));
440         }
441         catch (IOException e)
442         {
443             throw new BuildException("Unable to open file " + inputFile);
444         }
445 
446         try
447         {
448             outputFile.getParentFile().mkdirs();
449             outputFile.createNewFile();
450             os = new BufferedOutputStream(new FileOutputStream(outputFile));
451         }
452         catch (IOException e)
453         {
454             throw new BuildException("Unable to open destination file " + outputFile, e);
455         }
456 
457         tidy.parse(is, os);
458 
459         try
460         {
461             is.close();
462         }
463         catch (IOException e1)
464         {
465             // ignore
466         }
467         try
468         {
469             os.flush();
470             os.close();
471         }
472         catch (IOException e1)
473         {
474             // ignore
475         }
476 
477         // cleanup empty files
478         if (tidy.getParseErrors() > 0 && !tidy.getForceOutput())
479         {
480             outputFile.delete();
481         }
482 
483         if (failonerror && tidy.getParseErrors() > 0)
484         {
485             throw new BuildException("Tidy was unable to process file "
486                 + inputFile
487                 + ", "
488                 + tidy.getParseErrors()
489                 + " returned.");
490         }
491 
492     }
493 }