1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.BufferedReader;
57  import java.io.ByteArrayInputStream;
58  import java.io.ByteArrayOutputStream;
59  import java.io.FileInputStream;
60  import java.io.FileNotFoundException;
61  import java.io.FileWriter;
62  import java.io.IOException;
63  import java.io.InputStreamReader;
64  import java.io.OutputStream;
65  import java.io.PrintWriter;
66  import java.io.StringWriter;
67  import java.net.URL;
68  import java.util.ArrayList;
69  import java.util.Iterator;
70  import java.util.List;
71  import java.util.Properties;
72  
73  import javax.xml.parsers.SAXParser;
74  import javax.xml.parsers.SAXParserFactory;
75  
76  import junit.framework.TestCase;
77  
78  import org.apache.commons.logging.Log;
79  import org.apache.commons.logging.LogFactory;
80  import org.w3c.dom.Document;
81  import org.xml.sax.Attributes;
82  import org.xml.sax.InputSource;
83  import org.xml.sax.SAXException;
84  import org.xml.sax.helpers.DefaultHandler;
85  
86  
87  /***
88   * @author fgiust
89   * @version $Revision: 1.26 $ ($Author: fgiust $)
90   */
91  public class TidyTestCase extends TestCase
92  {
93  
94      /***
95       * Tidy executable name, if you want to produce output files for comparison.
96       */
97      private static final String TIDY_EXECUTABLE = "tidy.exe";
98  
99      /***
100      * Logger used to enable/disable output file generation using tidy c executable. Setting this logger to
101      * <code>debug</code> in your log4j configuration file will cause the TIDY_EXECUTABLE to be run against the actual
102      * test file. If set to false the command line used to manually run tidy will appear in the log.
103      */
104     private static final Log RUN_TIDY_EXECUTABLE = LogFactory.getLog("runtidy");
105 
106     /***
107      * Tidy test instance.
108      */
109     protected Tidy tidy;
110 
111     /***
112      * message listener.
113      */
114     protected TestMessageListener messageListener;
115 
116     /***
117      * Error out.
118      */
119     protected StringWriter errorLog;
120 
121     /***
122      * Tidy output.
123      */
124     protected String tidyOut;
125 
126     /***
127      * logger.
128      */
129     protected Log log = LogFactory.getLog(getClass());
130 
131     /***
132      * write directly to out. Useful for debugging (but it will make the test fail!).
133      */
134     private boolean writeToOut;
135 
136     /***
137      * Instantiate a new Test case.
138      * @param name test name
139      */
140     public TidyTestCase(String name)
141     {
142         super(name);
143     }
144 
145     /***
146      * @see junit.framework.TestCase#setUp()
147      */
148     protected void setUp() throws Exception
149     {
150         super.setUp();
151 
152         //creates a new Tidy
153         this.tidy = new Tidy();
154     }
155 
156     /***
157      * @see junit.framework.TestCase#tearDown()
158      */
159     protected void tearDown() throws Exception
160     {
161         this.tidy = null;
162         this.errorLog = null;
163         this.tidyOut = null;
164 
165         super.tearDown();
166     }
167 
168     /***
169      * Executes a tidy test. This method simply requires the input file name. If a file with the same name but with a
170      * ".cfg" extension is found is used as configuration file for the test, otherwise the default config will be used.
171      * If a file with the same name, but with the ".out" extension is found, tidy will the result with the content of
172      * such file.
173      * @param fileName input file name
174      * @throws Exception any exception generated during the test
175      */
176     protected void executeTidyTest(String fileName) throws Exception
177     {
178 
179         // set up Tidy using supplied configuration
180         setUpTidy(fileName);
181 
182         // input file
183         URL inputURL = getClass().getClassLoader().getResource(fileName);
184         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
185 
186         OutputStream out;
187         // out
188         if (!writeToOut)
189         {
190             out = new ByteArrayOutputStream();
191         }
192         else
193         {
194             out = System.out;
195         }
196 
197         // go!
198         this.tidy.parse(inputURL.openStream(), out);
199 
200         if (log.isDebugEnabled())
201         {
202             log.debug("out:\n---- out ----\n" + out + "\n---- out ----");
203             log.debug("log:\n---- log ----\n" + this.errorLog + "\n---- log ----");
204         }
205 
206         // existing file for comparison
207         String outFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".out";
208         URL outFile = getClass().getClassLoader().getResource(outFileName);
209 
210         this.tidyOut = out.toString();
211 
212         if (outFile != null)
213         {
214             log.debug("Comparing file using [" + outFileName + "]");
215             assertEquals(this.tidyOut, outFile);
216         }
217 
218         // check messages
219         String messagesFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".msg";
220         URL messagesFile = getClass().getClassLoader().getResource(messagesFileName);
221 
222         // save messages
223         if (messagesFile == null)
224         {
225             if (log.isDebugEnabled())
226             {
227                 log.debug("Messages file doesn't exists, generating [" + messagesFileName + "] for reference");
228             }
229             FileWriter fw = new FileWriter(inputURL.getFile().substring(0, inputURL.getFile().lastIndexOf("."))
230                 + ".msg");
231             fw.write(this.messageListener.messagesToXml());
232             fw.close();
233         }
234         else
235         {
236             // compare result to expected messages
237             if (log.isDebugEnabled())
238             {
239                 log.debug("Comparing messages using [" + messagesFileName + "]");
240             }
241             compareMsgXml(messagesFile);
242         }
243     }
244 
245     /***
246      * Parse an existing msg file and assert that content is identical to current output.
247      * @param messagesFile URL to mesage file
248      * @throws Exception any exception generated during the test
249      */
250     protected void compareMsgXml(URL messagesFile) throws Exception
251     {
252 
253         // first parse existing file
254         // avoid using DOM since if will need forking junit execution in maven (too slow)
255         SAXParserFactory factory = SAXParserFactory.newInstance();
256         SAXParser saxParser = factory.newSAXParser();
257 
258         MsgXmlHandler handler = new MsgXmlHandler();
259         saxParser.parse(new InputSource(messagesFile.openStream()), handler);
260         List expectedMsgs = handler.getMessages();
261 
262         List tidyMsgs = this.messageListener.getReceived();
263 
264         // assert size
265         assertEquals("Number of messages is different from expected", expectedMsgs.size(), tidyMsgs.size());
266 
267         // compare messages
268         Iterator expectedMsgIt = expectedMsgs.iterator();
269         Iterator tidyMsgIt = tidyMsgs.iterator();
270         int count = 0;
271         while (tidyMsgIt.hasNext())
272         {
273             TidyMessage expectedOne = (TidyMessage) expectedMsgIt.next();
274             TidyMessage tidyOne = (TidyMessage) tidyMsgIt.next();
275 
276             assertEquals("Error code for message [" + count + "] is different from expected", expectedOne
277                 .getErrorCode(), tidyOne.getErrorCode());
278 
279             assertEquals(
280                 "Level for message [" + count + "] is different from expected",
281                 expectedOne.getLevel(),
282                 tidyOne.getLevel());
283 
284             assertEquals("Line for message ["
285                 + count
286                 + "] is different from expected. Expected position: ["
287                 + expectedOne.getLine()
288                 + ":"
289                 + expectedOne.getColumn()
290                 + "] , current ["
291                 + tidyOne.getLine()
292                 + ":"
293                 + tidyOne.getColumn()
294                 + "]", expectedOne.getLine(), tidyOne.getLine());
295 
296             assertEquals("Column for message ["
297                 + count
298                 + "] is different from expected. Expected position: ["
299                 + expectedOne.getLine()
300                 + ":"
301                 + expectedOne.getColumn()
302                 + "] , current ["
303                 + tidyOne.getLine()
304                 + ":"
305                 + tidyOne.getColumn()
306                 + "]", expectedOne.getColumn(), tidyOne.getColumn());
307 
308             // don't assert text in respect for i18n
309 
310             count++;
311         }
312 
313     }
314 
315     /***
316      * Basic test for DOM parser. Test is set up using [fileName.cfg] configuration if the file exists. Calls
317      * tidy.parseDOM and returns the Document to the caller.
318      * @param fileName input file name
319      * @return parsed Document
320      * @throws Exception any exception generated during the test
321      */
322     protected Document parseDomTest(String fileName) throws Exception
323     {
324         //creates a new Tidy
325         setUpTidy(fileName);
326 
327         // input file
328         URL inputURL = getClass().getClassLoader().getResource(fileName);
329         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
330 
331         // out
332         OutputStream out = new ByteArrayOutputStream();
333 
334         Document doc = this.tidy.parseDOM(inputURL.openStream(), out);
335         this.tidyOut = out.toString();
336 
337         return doc;
338     }
339 
340     /***
341      * assert generated output and test file are equals.
342      * @param tidyOutput tidy output as string
343      * @param correctFile URL used to load the file for comparison
344      * @throws FileNotFoundException if test file is not found
345      * @throws IOException in reading file
346      */
347     protected void assertEquals(String tidyOutput, URL correctFile) throws FileNotFoundException, IOException
348     {
349         // assume the expected output has the same encoding tidy has in its configuration
350         String encodingName = tidy.getConfiguration().getOutCharEncodingName();
351 
352         diff(
353             new BufferedReader((new InputStreamReader(new ByteArrayInputStream(tidyOutput.getBytes()), encodingName))),
354             new BufferedReader(new InputStreamReader(new FileInputStream(correctFile.getPath()), encodingName)));
355     }
356 
357     /***
358      * Utility method: assert no warnings were reported in the last tidy run.
359      */
360     protected void assertNoWarnings()
361     {
362         int warningNum = this.tidy.getParseWarnings();
363         if (warningNum != 0)
364         {
365             fail("Test failed, [" + warningNum + "] false warnings were reported");
366         }
367     }
368 
369     /***
370      * Utility method: assert no errors were reported in the last tidy run.
371      */
372     protected void assertNoErrors()
373     {
374         int errorNum = this.tidy.getParseErrors();
375         if (errorNum != 0)
376         {
377             fail("Test failed, [" + errorNum + "] false errors were reported");
378         }
379     }
380 
381     /***
382      * Utility method: assert no warnings were reported in the last tidy run.
383      * @param expectedNumber expected number of warnings.
384      */
385     protected void assertWarnings(int expectedNumber)
386     {
387         int warningNum = this.tidy.getParseWarnings();
388         if (warningNum != expectedNumber)
389         {
390             fail("Test failed, [" + expectedNumber + "] warnings expected, [" + warningNum + "] were reported");
391         }
392     }
393 
394     /***
395      * Utility method: assert no errors were reported in the last tidy run.
396      * @param expectedNumber expected number of errors.
397      */
398     protected void assertErrors(int expectedNumber)
399     {
400         int errorNum = this.tidy.getParseErrors();
401         if (errorNum != expectedNumber)
402         {
403             fail("Test failed, [" + expectedNumber + "] errors expected, [" + errorNum + "] were reported");
404         }
405     }
406 
407     /***
408      * Utility method: asserts a given String can be found in the error log.
409      * @param expectedString expected String in error log.
410      */
411     protected void assertLogContains(String expectedString)
412     {
413         String logString = this.errorLog.toString();
414 
415         if (logString.indexOf(expectedString) == -1)
416         {
417             fail("Test failed, expected [" + expectedString + "] couldn't be found in error log.");
418         }
419     }
420 
421     /***
422      * Utility method: asserts a given String can't be found in the error log.
423      * @param expectedString expected String in error log.
424      */
425     protected void assertLogDoesntContains(String expectedString)
426     {
427         String logString = this.errorLog.toString();
428 
429         if (logString.indexOf(expectedString) != -1)
430         {
431             fail("Test failed, [" + expectedString + "] was found in error log.");
432         }
433     }
434 
435     /***
436      * set up the tidy instance.
437      * @param fileName input file name (needed to determine configuration file name)
438      * @throws IOException in reading configuration file
439      */
440     private void setUpTidy(String fileName) throws IOException
441     {
442         // config file names
443         String configFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".cfg";
444         String messagesFileName = fileName.substring(0, fileName.lastIndexOf("."));
445 
446         // input file
447         URL inputURL = getClass().getClassLoader().getResource(fileName);
448         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
449 
450         // configuration file
451         URL configurationFile = getClass().getClassLoader().getResource(configFileName);
452 
453         // debug runing test info
454         if (log.isDebugEnabled())
455         {
456             StringBuffer message = new StringBuffer();
457             message.append("Testing [" + fileName + "]");
458             if (configurationFile != null)
459             {
460                 message.append(" using configuration file [" + configFileName + "]");
461             }
462             log.debug(message.toString());
463         }
464 
465         if (configurationFile == null)
466         {
467             configurationFile = getClass().getClassLoader().getResource("default.cfg");
468         }
469 
470         generateOutputUsingTidyC(inputURL.getFile(), configurationFile.getFile(), RUN_TIDY_EXECUTABLE.isDebugEnabled());
471 
472         // if configuration file exists load and set it
473         Properties testProperties = new Properties();
474         testProperties.load(configurationFile.openStream());
475         this.tidy.setConfigurationFromProps(testProperties);
476 
477         // set up error log
478         this.errorLog = new StringWriter();
479         this.tidy.setErrout(new PrintWriter(this.errorLog));
480 
481         this.messageListener = new TestMessageListener(messagesFileName);
482         this.tidy.setMessageListener(messageListener);
483     }
484 
485     /***
486      * Diff between two buffered readers. If comparison fails an AssertionFailedException is thrown with the line
487      * number, actual and expected output. Content is tested to be identical (same wrapping).
488      * @param tidyOutput reader for tidy generated output
489      * @param correctFile reader for test file
490      * @throws IOException in reading from readers
491      */
492     private static void diff(BufferedReader tidyOutput, BufferedReader correctFile) throws IOException
493     {
494         String tidyLine, testLine;
495         int i = 1;
496         do
497         {
498             tidyLine = tidyOutput.readLine();
499             testLine = correctFile.readLine();
500             i++;
501         }
502         while ((tidyLine != null) && (testLine != null) && (tidyLine.equals(testLine)));
503         tidyOutput.close();
504         correctFile.close();
505 
506         if ((tidyLine != null) || (testLine != null))
507         {
508             fail("Wrong output, file comparison failed at line ["
509                 + (i - 1)
510                 + "]:\n"
511                 + "[tidy]["
512                 + tidyLine
513                 + "]\n"
514                 + "[test]["
515                 + testLine
516                 + "]");
517         }
518         return;
519     }
520 
521     /***
522      * Run TIDY_EXECUTABLE to produce an output file. Used to generates output files using tidy c for comparison with
523      * jtidy. A file ".out" will be written in the same folder of the input file.
524      * @param inputFileName input file for tidy.
525      * @param configurationFileName configuration file name (default if there is no not test-specific file).
526      * @param runIt if true the output is generated using tidy, if false simply output the command line.
527      */
528     private void generateOutputUsingTidyC(String inputFileName, String configurationFileName, boolean runIt)
529     {
530 
531         String outputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(".")) + ".out";
532 
533         String strCmd = TIDY_EXECUTABLE
534             + " -config \""
535             + cleanUpFilePath(configurationFileName)
536             + "\" -o \""
537             + cleanUpFilePath(outputFileName)
538             + "\" \""
539             + cleanUpFilePath(inputFileName)
540             + "\"";
541 
542         log.debug("cmd line:\n***\n"
543             + strCmd
544             + "\nw/o output:\n"
545             + TIDY_EXECUTABLE
546             + " -config \""
547             + cleanUpFilePath(configurationFileName)
548             + "\" \""
549             + cleanUpFilePath(inputFileName)
550             + "\""
551             + "\n***");
552 
553         if (runIt)
554         {
555             log.debug("running " + TIDY_EXECUTABLE);
556             try
557             {
558                 Runtime.getRuntime().exec(strCmd);
559             }
560             catch (IOException e)
561             {
562                 log.warn("Error running [" + strCmd + "] cmd: " + e.getMessage());
563             }
564         }
565 
566     }
567 
568     /***
569      * Utility method to clean up file path returned by URLs.
570      * @param fileName file name as given by URL.getFile()
571      * @return String fileName
572      */
573     protected String cleanUpFilePath(String fileName)
574     {
575         if (fileName.length() > 3 && fileName.charAt(2) == ':')
576         {
577             // assuming something like ""/C:/program files/..."
578             return fileName.substring(1);
579         }
580         else if (fileName.startsWith("file://"))
581         {
582             return fileName.substring(7);
583         }
584 
585         return fileName;
586 
587     }
588 
589     /***
590      * A simple SAX Content Handler used to parse .msg files.
591      */
592     static class MsgXmlHandler extends DefaultHandler
593     {
594 
595         /***
596          * Parsed messages.
597          */
598         private List messages = new ArrayList();
599 
600         /***
601          * Error code for the current message.
602          */
603         private int code;
604 
605         /***
606          * Level for the current message.
607          */
608         private int level;
609 
610         /***
611          * Column for the current message.
612          */
613         private int column;
614 
615         /***
616          * Line for the current message.
617          */
618         private int line;
619 
620         /***
621          * Message the current message.
622          */
623         private StringBuffer textbuffer;
624 
625         /***
626          * Actual parsing position.
627          */
628         private int parsePosition = -100;
629 
630         /***
631          * actually parsing a detail tag.
632          */
633         private boolean intag;
634 
635         /***
636          * @see org.xml.sax.ContentHandler#startElement(String, String, String, org.xml.sax.Attributes)
637          */
638         public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
639         {
640             if ("message".equals(qName))
641             {
642                 parsePosition = 0;
643                 textbuffer = new StringBuffer();
644             }
645             else
646             {
647                 parsePosition++;
648                 intag = true;
649             }
650         }
651 
652         /***
653          * @see org.xml.sax.ContentHandler#endElement(String, String, String)
654          */
655         public void endElement(String uri, String localName, String qName) throws SAXException
656         {
657             if ("message".equals(qName))
658             {
659                 TidyMessage message = new TidyMessage(code, line, column, TidyMessage.Level.fromCode(level), textbuffer
660                     .toString());
661                 messages.add(message);
662             }
663             intag = false;
664         }
665 
666         /***
667          * @see org.xml.sax.ContentHandler#characters(char[], int, int)
668          */
669         public void characters(char[] ch, int start, int length) throws SAXException
670         {
671             if (!intag)
672             {
673                 return;
674             }
675 
676             switch (parsePosition)
677             {
678                 case 1 :
679                     this.code = Integer.parseInt(new String(ch, start, length));
680                     break;
681                 case 2 :
682                     this.level = Integer.parseInt(new String(ch, start, length));
683                     break;
684                 case 3 :
685                     this.line = Integer.parseInt(new String(ch, start, length));
686                     break;
687                 case 4 :
688                     this.column = Integer.parseInt(new String(ch, start, length));
689                     break;
690                 case 5 :
691                     textbuffer.append(new String(ch, start, length));
692                     break;
693                 default :
694                     break;
695             }
696         }
697 
698         /***
699          * Returns the list of parsed messages.
700          * @return List containing TidyMessage elements
701          */
702         public List getMessages()
703         {
704             return messages;
705         }
706     }
707 }