View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy.ant;
55  
56  import java.io.BufferedInputStream;
57  import java.io.BufferedOutputStream;
58  import java.io.ByteArrayOutputStream;
59  import java.io.File;
60  import java.io.FileInputStream;
61  import java.io.FileNotFoundException;
62  import java.io.FileOutputStream;
63  import java.io.IOException;
64  import java.io.InputStream;
65  import java.io.OutputStream;
66  import java.io.PrintWriter;
67  import java.util.ArrayList;
68  import java.util.Iterator;
69  import java.util.List;
70  import java.util.Properties;
71  
72  import org.apache.tools.ant.BuildException;
73  import org.apache.tools.ant.DirectoryScanner;
74  import org.apache.tools.ant.Project;
75  import org.apache.tools.ant.Task;
76  import org.apache.tools.ant.types.FileSet;
77  import org.apache.tools.ant.types.Parameter;
78  import org.apache.tools.ant.util.FileNameMapper;
79  import org.apache.tools.ant.util.FlatFileNameMapper;
80  import org.apache.tools.ant.util.IdentityMapper;
81  import org.w3c.tidy.Tidy;
82  
83  
84  /***
85   * JTidy ant task.
86   * <h3>Parameters</h3>
87   * <table cellspacing="0" border="1"> <thead>
88   * <tr>
89   * <th>Attribute</th>
90   * <th>Description</th>
91   * <th>Required</th>
92   * </tr>
93   * </thead> <tbody>
94   * <tr>
95   * <td>srcfile</td>
96   * <td>source file</td>
97   * <td>Yes, unless a nested <code>&lt;fileset></code> element is used.</td>
98   * </tr>
99   * <tr>
100  * <td>destfile</td>
101  * <td>destination file for output</td>
102  * <td rowspan="2">With the <code>srcfile</code> attribute, either <code>destfile</code> or <code>destdir</code>
103  * can be used. With nested <code>&lt;fileset></code> elements only <code>destdir</code> is allowed.</td>
104  * </tr>
105  * <tr>
106  * <td>destdir</td>
107  * <td>destination directory for output</td>
108  * </tr>
109  * <tr>
110  * <td>properties</td>
111  * <td>Path to a valid tidy properties file</td>
112  * <td>No</td>
113  * </tr>
114  * <tr>
115  * <td>flatten</td>
116  * <td>Ignore the directory structure of the source files, and copy all files into the directory specified by the
117  * <code>destdir</code> attribute.</td>
118  * <td>No; defaults to false.</td>
119  * </tr>
120  * <tr>
121  * <td>failonerror</td>
122  * <td>boolean to control whether failure to execute should throw a BuildException or just print an error. If set to
123  * <code>true</code> errors in input files which tidy is enable to fix will cause a failure.</td>
124  * <td>No; defaults to false.</td>
125  * </tr>
126  * </tbody> </table>
127  * <h3>Nested elements</h3>
128  * <ul>
129  * <li><strong>Fileset </strong>: if you need to run tidy on more than one file, you can specify nested filesets.</li>
130  * <li><strong>Parameter </strong>: you can specify any tidy configuration option directly using a nested
131  * <code>parameter</code> element.</li>
132  * </ul>
133  * <h3>Setup</h3>
134  * <p>
135  * Adds the following <code>typedef</code> to setup the JTidy task in your build.xml:
136  * </p>
137  * 
138  * <pre>
139  * &lt;taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask"/>
140  * </pre>
141  * 
142  * <p>
143  * This will work if JTidy jar is copied to ant lib directory. If you need to reference the jar elsewhere on the
144  * filesystem you can add a nested classpath element:
145  * </p>
146  * 
147  * <pre>
148  * &lt;taskdef name="tidy" classname="org.w3c.tidy.ant.JTidyTask">
149  *   &lt;classpath>
150  *     &lt;pathelement location="${lib.dir}/jtidy.jar"/>
151  *   &lt;/classpath>
152  * &lt;/taskdef>
153  * </pre>
154  * 
155  * <h3>Examples</h3>
156  * 
157  * <pre>
158  * &lt;tidy destdir="out" properties="/path/to/tidy.properties">
159  *   &lt;fileset dir="inputdir" />
160  *   &lt;parameter name="drop-font-tags" value="true" />
161  * &lt/tidy>
162  * </pre>
163  * 
164  * @author Fabrizio Giustina
165  * @version $Revision: 1.8 $ ($Author: fgiust $)
166  */
167 public class JTidyTask extends Task
168 {
169 
170     /***
171      * Filesets.
172      */
173     private List filesets = new ArrayList();
174 
175     /***
176      * Destination directory for output.
177      */
178     private File destdir;
179 
180     /***
181      * Destination file for output.
182      */
183     private File destfile;
184 
185     /***
186      * Source file.
187      */
188     private File srcfile;
189 
190     /***
191      * Control whether failure to execute should throw a BuildException.
192      */
193     private boolean failonerror;
194 
195     /***
196      * Don't output directories.
197      */
198     private boolean flatten;
199 
200     /***
201      * tidy instance.
202      */
203     private Tidy tidy;
204 
205     /***
206      * Configured properties.
207      */
208     private Properties props;
209 
210     /***
211      * Properties file.
212      */
213     private File properties;
214 
215     /***
216      * @param destdir The destdir to set.
217      */
218     public void setDestdir(File destdir)
219     {
220         this.destdir = destdir;
221     }
222 
223     /***
224      * @param destfile The destfile to set.
225      */
226     public void setDestfile(File destfile)
227     {
228         this.destfile = destfile;
229     }
230 
231     /***
232      * @param srcfile The srcfile to set.
233      */
234     public void setSrcfile(File srcfile)
235     {
236         this.srcfile = srcfile;
237     }
238 
239     /***
240      * @param failonerror The failonerror to set.
241      */
242     public void setFailonerror(boolean failonerror)
243     {
244         this.failonerror = failonerror;
245     }
246 
247     /***
248      * @param flatten The flatten to set.
249      */
250     public void setFlatten(boolean flatten)
251     {
252         this.flatten = flatten;
253     }
254 
255     /***
256      * @param properties The properties to set.
257      */
258     public void setProperties(File properties)
259     {
260         this.properties = properties;
261     }
262 
263     /***
264      * Adds a fileset to be processed Fileset
265      * @param fileSet
266      */
267     public void addFileset(FileSet fileSet)
268     {
269         filesets.add(fileSet);
270     }
271 
272     /***
273      * Setter method for any property using the ant type Parameter.
274      * @param prop Ant type Parameter
275      */
276     public void addConfiguredParameter(Parameter prop)
277     {
278         props.setProperty(prop.getName(), prop.getValue());
279     }
280 
281     /***
282      * Initializes the task.
283      */
284     public void init()
285     {
286         super.init();
287 
288         // Setup a Tidy instance
289         tidy = new Tidy();
290         props = new Properties();
291     }
292 
293     /***
294      * Validates task parameters.
295      * @throws BuildException if any invalid parameter is found
296      */
297     protected void validateParameters() throws BuildException
298     {
299         if (srcfile == null && filesets.size() == 0)
300         {
301             throw new BuildException("Specify at least srcfile or a fileset.");
302         }
303         if (srcfile != null && filesets.size() > 0)
304         {
305             throw new BuildException("You can't specify both srcfile and nested filesets.");
306         }
307 
308         if (destfile == null && destdir == null)
309         {
310             throw new BuildException("One of destfile or destdir must be set.");
311         }
312 
313         if (srcfile == null && destfile != null)
314         {
315             throw new BuildException("You only can use destfile with srcfile.");
316         }
317 
318         if (srcfile != null && srcfile.exists() && srcfile.isDirectory())
319         {
320             throw new BuildException("srcfile can't be a directory.");
321         }
322 
323         if (properties != null && (!properties.exists() || properties.isDirectory()))
324         {
325             throw new BuildException("Invalid properties file specified: " + properties.getPath());
326         }
327 
328     }
329 
330     /***
331      * Run the task.
332      * @exception BuildException The exception raised during task execution.
333      */
334     public void execute() throws BuildException
335     {
336         // validate
337         validateParameters();
338 
339         // load configuration
340         if (this.properties != null)
341         {
342             try
343             {
344                 this.props.load(new FileInputStream(this.properties));
345             }
346             catch (IOException e)
347             {
348                 throw new BuildException("Unable to load properties file " + properties, e);
349             }
350         }
351 
352         // hide output unless set in properties
353         tidy.setErrout(new PrintWriter(new ByteArrayOutputStream()));
354 
355         tidy.setConfigurationFromProps(props);
356 
357         if (this.srcfile != null)
358         {
359             // process a single file
360             executeSingle();
361         }
362         else
363         {
364             // process filesets
365             executeSet();
366         }
367     }
368 
369     /***
370      * A single file has been specified.
371      */
372     protected void executeSingle()
373     {
374 
375         if (!srcfile.exists())
376         {
377             throw new BuildException("Could not find source file " + srcfile.getAbsolutePath() + ".");
378         }
379 
380         if (destfile == null)
381         {
382             // destdir can't be null, condition is checked in validateParameters()
383             destfile = new File(destdir, srcfile.getName());
384         }
385 
386         processFile(srcfile, destfile);
387     }
388 
389     /***
390      * Run tidy on filesets.
391      */
392     protected void executeSet()
393     {
394 
395         FileNameMapper mapper = null;
396         if (flatten)
397         {
398             mapper = new FlatFileNameMapper();
399         }
400         else
401         {
402             mapper = new IdentityMapper();
403         }
404 
405         mapper.setTo(this.destdir.getAbsolutePath());
406 
407         Iterator iterator = filesets.iterator();
408         while (iterator.hasNext())
409         {
410             FileSet fileSet = (FileSet) iterator.next();
411             DirectoryScanner directoryScanner = fileSet.getDirectoryScanner(getProject());
412             String[] sourceFiles = directoryScanner.getIncludedFiles();
413             File inputdir = directoryScanner.getBasedir();
414 
415             mapper.setFrom(inputdir.getAbsolutePath());
416 
417             for (int j = 0; j < sourceFiles.length; j++)
418             {
419                 String[] mapped = mapper.mapFileName(sourceFiles[j]);
420 
421                 processFile(new File(inputdir, sourceFiles[j]), new File(this.destdir, mapped[0]));
422             }
423         }
424     }
425 
426     /***
427      * Run tidy on a file.
428      * @param inputFile input file
429      * @param outputFile output file
430      */
431     protected void processFile(File inputFile, File outputFile)
432     {
433 
434         log("Processing " + inputFile.getAbsolutePath(), Project.MSG_DEBUG);
435 
436         InputStream is;
437         OutputStream os;
438         try
439         {
440             is = new BufferedInputStream(new FileInputStream(inputFile));
441         }
442         catch (IOException e)
443         {
444             throw new BuildException("Unable to open file " + inputFile);
445         }
446 
447         try
448         {
449             outputFile.getParentFile().mkdirs();
450             outputFile.createNewFile();
451             os = new BufferedOutputStream(new FileOutputStream(outputFile));
452         }
453         catch (IOException e)
454         {
455             throw new BuildException("Unable to open destination file " + outputFile, e);
456         }
457 
458         tidy.parse(is, os);
459 
460         try
461         {
462             is.close();
463         }
464         catch (IOException e1)
465         {
466             // ignore
467         }
468         try
469         {
470             os.flush();
471             os.close();
472         }
473         catch (IOException e1)
474         {
475             // ignore
476         }
477 
478         // cleanup empty files
479         if (tidy.getParseErrors() > 0 && !tidy.getForceOutput())
480         {
481             outputFile.delete();
482         }
483 
484         if (failonerror && tidy.getParseErrors() > 0)
485         {
486             throw new BuildException("Tidy was unable to process file "
487                 + inputFile
488                 + ", "
489                 + tidy.getParseErrors()
490                 + " returned.");
491         }
492 
493     }
494 }