1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.BufferedReader;
57  import java.io.ByteArrayInputStream;
58  import java.io.ByteArrayOutputStream;
59  import java.io.FileInputStream;
60  import java.io.FileNotFoundException;
61  import java.io.FileWriter;
62  import java.io.IOException;
63  import java.io.InputStreamReader;
64  import java.io.OutputStream;
65  import java.io.PrintWriter;
66  import java.io.StringWriter;
67  import java.net.URL;
68  import java.util.ArrayList;
69  import java.util.Iterator;
70  import java.util.List;
71  import java.util.Properties;
72  
73  import javax.xml.parsers.SAXParser;
74  import javax.xml.parsers.SAXParserFactory;
75  
76  import junit.framework.TestCase;
77  
78  import org.apache.commons.logging.Log;
79  import org.apache.commons.logging.LogFactory;
80  import org.w3c.dom.Document;
81  import org.xml.sax.Attributes;
82  import org.xml.sax.InputSource;
83  import org.xml.sax.SAXException;
84  import org.xml.sax.helpers.DefaultHandler;
85  
86  
87  /**
88   * @author fgiust
89   * @version $Revision: 808 $ ($Author: fgiust $)
90   */
91  public class TidyTestCase extends TestCase
92  {
93  
94      /**
95       * Tidy executable name, if you want to produce output files for comparison.
96       */
97      private static final String TIDY_EXECUTABLE = "tidy.exe";
98  
99      /**
100      * Logger used to enable/disable output file generation using tidy c executable. Setting this logger to
101      * <code>debug</code> in your log4j configuration file will cause the TIDY_EXECUTABLE to be run against the actual
102      * test file. If set to false the command line used to manually run tidy will appear in the log.
103      */
104     private static final Log RUN_TIDY_EXECUTABLE = LogFactory.getLog("runtidy");
105 
106     /**
107      * Tidy test instance.
108      */
109     protected Tidy tidy;
110 
111     /**
112      * message listener.
113      */
114     protected TestMessageListener messageListener;
115 
116     /**
117      * Error out.
118      */
119     protected StringWriter errorLog;
120 
121     /**
122      * Tidy output.
123      */
124     protected String tidyOut;
125 
126     /**
127      * logger.
128      */
129     protected Log log = LogFactory.getLog(getClass());
130 
131     /**
132      * write directly to out. Useful for debugging (but it will make the test fail!).
133      */
134     private boolean writeToOut;
135 
136     /**
137      * Instantiate a new Test case.
138      * @param name test name
139      */
140     public TidyTestCase(String name)
141     {
142         super(name);
143     }
144 
145     /**
146      * @see junit.framework.TestCase#setUp()
147      */
148     protected void setUp() throws Exception
149     {
150         super.setUp();
151 
152         // creates a new Tidy
153         this.tidy = new Tidy();
154     }
155 
156     /**
157      * @see junit.framework.TestCase#tearDown()
158      */
159     protected void tearDown() throws Exception
160     {
161         this.tidy = null;
162         this.errorLog = null;
163         this.tidyOut = null;
164 
165         super.tearDown();
166     }
167 
168     /**
169      * Executes a tidy test. This method simply requires the input file name. If a file with the same name but with a
170      * ".cfg" extension is found is used as configuration file for the test, otherwise the default config will be used.
171      * If a file with the same name, but with the ".out" extension is found, tidy will the result with the content of
172      * such file.
173      * @param fileName input file name
174      * @throws Exception any exception generated during the test
175      */
176     protected void executeTidyTest(String fileName) throws Exception
177     {
178 
179         // set up Tidy using supplied configuration
180         setUpTidy(fileName);
181 
182         // input file
183         URL inputURL = getClass().getClassLoader().getResource(fileName);
184         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
185 
186         OutputStream out;
187         // out
188         if (!writeToOut)
189         {
190             out = new ByteArrayOutputStream();
191         }
192         else
193         {
194             out = System.out;
195         }
196 
197         // go!
198         this.tidy.parse(inputURL.openStream(), out);
199 
200         if (log.isDebugEnabled())
201         {
202             log.debug("out:\n---- out ----\n" + out + "\n---- out ----");
203             log.debug("log:\n---- log ----\n" + this.errorLog + "\n---- log ----");
204         }
205 
206         // existing file for comparison
207         String outFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".out";
208         URL outFile = getClass().getClassLoader().getResource(outFileName);
209 
210         this.tidyOut = out.toString();
211 
212         if (outFile != null)
213         {
214             log.debug("Comparing file using [" + outFileName + "]");
215             assertEquals(this.tidyOut, outFile);
216         }
217 
218         // check messages
219         String messagesFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".msg";
220         URL messagesFile = getClass().getClassLoader().getResource(messagesFileName);
221 
222         // save messages
223         if (messagesFile == null)
224         {
225             if (log.isDebugEnabled())
226             {
227                 log.debug("Messages file doesn't exists, generating [" + messagesFileName + "] for reference");
228             }
229             FileWriter fw = new FileWriter(inputURL.getFile().substring(0, inputURL.getFile().lastIndexOf("."))
230                 + ".msg");
231             fw.write(this.messageListener.messagesToXml());
232             fw.close();
233         }
234         else
235         {
236             // compare result to expected messages
237             if (log.isDebugEnabled())
238             {
239                 log.debug("Comparing messages using [" + messagesFileName + "]");
240             }
241             compareMsgXml(messagesFile);
242         }
243     }
244 
245     /**
246      * Parse an existing msg file and assert that content is identical to current output.
247      * @param messagesFile URL to mesage file
248      * @throws Exception any exception generated during the test
249      */
250     protected void compareMsgXml(URL messagesFile) throws Exception
251     {
252 
253         // first parse existing file
254         // avoid using DOM since if will need forking junit execution in maven (too slow)
255         SAXParserFactory factory = SAXParserFactory.newInstance();
256         SAXParser saxParser = factory.newSAXParser();
257 
258         MsgXmlHandler handler = new MsgXmlHandler();
259         saxParser.parse(new InputSource(messagesFile.openStream()), handler);
260         List expectedMsgs = handler.getMessages();
261 
262         List tidyMsgs = this.messageListener.getReceived();
263 
264         // assert size
265         if (expectedMsgs.size() != tidyMsgs.size())
266         {
267             StringBuffer messagesAsString = new StringBuffer();
268 
269             for (Iterator iter = tidyMsgs.iterator(); iter.hasNext();)
270             {
271                 TidyMessage message = (TidyMessage) iter.next();
272                 messagesAsString.append("\n");
273                 messagesAsString.append(message.getMessage());
274             }
275 
276             fail("Expected "
277                 + expectedMsgs.size()
278                 + " messages but got "
279                 + tidyMsgs.size()
280                 + ". Messages:"
281                 + messagesAsString.toString());
282         }
283 
284         // compare messages
285         Iterator expectedMsgIt = expectedMsgs.iterator();
286         Iterator tidyMsgIt = tidyMsgs.iterator();
287         int count = 0;
288         while (tidyMsgIt.hasNext())
289         {
290             TidyMessage expectedOne = (TidyMessage) expectedMsgIt.next();
291             TidyMessage tidyOne = (TidyMessage) tidyMsgIt.next();
292 
293             assertEquals("Error code for message [" + count + "] is different from expected", expectedOne
294                 .getErrorCode(), tidyOne.getErrorCode());
295 
296             assertEquals(
297                 "Level for message [" + count + "] is different from expected",
298                 expectedOne.getLevel(),
299                 tidyOne.getLevel());
300 
301             assertEquals("Line for message ["
302                 + count
303                 + "] is different from expected. Expected position: ["
304                 + expectedOne.getLine()
305                 + ":"
306                 + expectedOne.getColumn()
307                 + "] , current ["
308                 + tidyOne.getLine()
309                 + ":"
310                 + tidyOne.getColumn()
311                 + "]", expectedOne.getLine(), tidyOne.getLine());
312 
313             assertEquals("Column for message ["
314                 + count
315                 + "] is different from expected. Expected position: ["
316                 + expectedOne.getLine()
317                 + ":"
318                 + expectedOne.getColumn()
319                 + "] , current ["
320                 + tidyOne.getLine()
321                 + ":"
322                 + tidyOne.getColumn()
323                 + "]", expectedOne.getColumn(), tidyOne.getColumn());
324 
325             // don't assert text in respect for i18n
326 
327             count++;
328         }
329 
330     }
331 
332     /**
333      * Basic test for DOM parser. Test is set up using [fileName.cfg] configuration if the file exists. Calls
334      * tidy.parseDOM and returns the Document to the caller.
335      * @param fileName input file name
336      * @return parsed Document
337      * @throws Exception any exception generated during the test
338      */
339     protected Document parseDomTest(String fileName) throws Exception
340     {
341         // creates a new Tidy
342         setUpTidy(fileName);
343 
344         // input file
345         URL inputURL = getClass().getClassLoader().getResource(fileName);
346         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
347 
348         // out
349         OutputStream out = new ByteArrayOutputStream();
350 
351         Document doc = this.tidy.parseDOM(inputURL.openStream(), out);
352         this.tidyOut = out.toString();
353 
354         return doc;
355     }
356 
357     /**
358      * assert generated output and test file are equals.
359      * @param tidyOutput tidy output as string
360      * @param correctFile URL used to load the file for comparison
361      * @throws FileNotFoundException if test file is not found
362      * @throws IOException in reading file
363      */
364     protected void assertEquals(String tidyOutput, URL correctFile) throws FileNotFoundException, IOException
365     {
366         // assume the expected output has the same encoding tidy has in its configuration
367         String encodingName = tidy.getConfiguration().getOutCharEncodingName();
368 
369         diff(
370             new BufferedReader((new InputStreamReader(new ByteArrayInputStream(tidyOutput.getBytes()), encodingName))),
371             new BufferedReader(new InputStreamReader(new FileInputStream(correctFile.getPath()), encodingName)));
372     }
373 
374     /**
375      * Utility method: assert no warnings were reported in the last tidy run.
376      */
377     protected void assertNoWarnings()
378     {
379         int warningNum = this.tidy.getParseWarnings();
380         if (warningNum != 0)
381         {
382             fail("Test failed, [" + warningNum + "] false warnings were reported");
383         }
384     }
385 
386     /**
387      * Utility method: assert no errors were reported in the last tidy run.
388      */
389     protected void assertNoErrors()
390     {
391         int errorNum = this.tidy.getParseErrors();
392         if (errorNum != 0)
393         {
394             fail("Test failed, [" + errorNum + "] false errors were reported");
395         }
396     }
397 
398     /**
399      * Utility method: assert no warnings were reported in the last tidy run.
400      * @param expectedNumber expected number of warnings.
401      */
402     protected void assertWarnings(int expectedNumber)
403     {
404         int warningNum = this.tidy.getParseWarnings();
405         if (warningNum != expectedNumber)
406         {
407             fail("Test failed, [" + expectedNumber + "] warnings expected, [" + warningNum + "] were reported");
408         }
409     }
410 
411     /**
412      * Utility method: assert no errors were reported in the last tidy run.
413      * @param expectedNumber expected number of errors.
414      */
415     protected void assertErrors(int expectedNumber)
416     {
417         int errorNum = this.tidy.getParseErrors();
418         if (errorNum != expectedNumber)
419         {
420             fail("Test failed, [" + expectedNumber + "] errors expected, [" + errorNum + "] were reported");
421         }
422     }
423 
424     /**
425      * Utility method: asserts a given String can be found in the error log.
426      * @param expectedString expected String in error log.
427      */
428     protected void assertLogContains(String expectedString)
429     {
430         String logString = this.errorLog.toString();
431 
432         if (logString.indexOf(expectedString) == -1)
433         {
434             fail("Test failed, expected [" + expectedString + "] couldn't be found in error log.");
435         }
436     }
437 
438     /**
439      * Utility method: asserts a given String can't be found in the error log.
440      * @param expectedString expected String in error log.
441      */
442     protected void assertLogDoesntContains(String expectedString)
443     {
444         String logString = this.errorLog.toString();
445 
446         if (logString.indexOf(expectedString) != -1)
447         {
448             fail("Test failed, [" + expectedString + "] was found in error log.");
449         }
450     }
451 
452     /**
453      * set up the tidy instance.
454      * @param fileName input file name (needed to determine configuration file name)
455      * @throws IOException in reading configuration file
456      */
457     private void setUpTidy(String fileName) throws IOException
458     {
459         // config file names
460         String configFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".cfg";
461         String messagesFileName = fileName.substring(0, fileName.lastIndexOf("."));
462 
463         // input file
464         URL inputURL = getClass().getClassLoader().getResource(fileName);
465         assertNotNull("Can't find input file [" + fileName + "]", inputURL);
466 
467         // configuration file
468         URL configurationFile = getClass().getClassLoader().getResource(configFileName);
469 
470         // debug runing test info
471         if (log.isDebugEnabled())
472         {
473             StringBuffer message = new StringBuffer();
474             message.append("Testing [" + fileName + "]");
475             if (configurationFile != null)
476             {
477                 message.append(" using configuration file [" + configFileName + "]");
478             }
479             log.debug(message.toString());
480         }
481 
482         if (configurationFile == null)
483         {
484             configurationFile = getClass().getClassLoader().getResource("default.cfg");
485         }
486 
487         generateOutputUsingTidyC(inputURL.getFile(), configurationFile.getFile(), RUN_TIDY_EXECUTABLE.isDebugEnabled());
488 
489         // if configuration file exists load and set it
490         Properties testProperties = new Properties();
491         testProperties.load(configurationFile.openStream());
492         this.tidy.setConfigurationFromProps(testProperties);
493 
494         // set up error log
495         this.errorLog = new StringWriter();
496         this.tidy.setErrout(new PrintWriter(this.errorLog));
497 
498         this.messageListener = new TestMessageListener(messagesFileName);
499         this.tidy.setMessageListener(messageListener);
500     }
501 
502     /**
503      * Diff between two buffered readers. If comparison fails an AssertionFailedException is thrown with the line
504      * number, actual and expected output. Content is tested to be identical (same wrapping).
505      * @param tidyOutput reader for tidy generated output
506      * @param correctFile reader for test file
507      * @throws IOException in reading from readers
508      */
509     private static void diff(BufferedReader tidyOutput, BufferedReader correctFile) throws IOException
510     {
511         String tidyLine, testLine;
512         int i = 1;
513         do
514         {
515             tidyLine = tidyOutput.readLine();
516             testLine = correctFile.readLine();
517             i++;
518         }
519         while ((tidyLine != null) && (testLine != null) && (tidyLine.equals(testLine)));
520         tidyOutput.close();
521         correctFile.close();
522 
523         if ((tidyLine != null) || (testLine != null))
524         {
525             assertEquals("Wrong output, file comparison failed at line [" + (i - 1) + "]", testLine, tidyLine);
526         }
527         return;
528     }
529 
530     /**
531      * Run TIDY_EXECUTABLE to produce an output file. Used to generates output files using tidy c for comparison with
532      * jtidy. A file ".out" will be written in the same folder of the input file.
533      * @param inputFileName input file for tidy.
534      * @param configurationFileName configuration file name (default if there is no not test-specific file).
535      * @param runIt if true the output is generated using tidy, if false simply output the command line.
536      */
537     private void generateOutputUsingTidyC(String inputFileName, String configurationFileName, boolean runIt)
538     {
539 
540         String outputFileName = inputFileName.substring(0, inputFileName.lastIndexOf(".")) + ".out";
541 
542         String strCmd = TIDY_EXECUTABLE
543             + " -config \""
544             + cleanUpFilePath(configurationFileName)
545             + "\" -o \""
546             + cleanUpFilePath(outputFileName)
547             + "\" \""
548             + cleanUpFilePath(inputFileName)
549             + "\"";
550 
551         log.debug("cmd line:\n***\n"
552             + strCmd
553             + "\nw/o output:\n"
554             + TIDY_EXECUTABLE
555             + " -config \""
556             + cleanUpFilePath(configurationFileName)
557             + "\" \""
558             + cleanUpFilePath(inputFileName)
559             + "\""
560             + "\n***");
561 
562         if (runIt)
563         {
564             log.debug("running " + TIDY_EXECUTABLE);
565             try
566             {
567                 Runtime.getRuntime().exec(strCmd);
568             }
569             catch (IOException e)
570             {
571                 log.warn("Error running [" + strCmd + "] cmd: " + e.getMessage());
572             }
573         }
574 
575     }
576 
577     /**
578      * Utility method to clean up file path returned by URLs.
579      * @param fileName file name as given by URL.getFile()
580      * @return String fileName
581      */
582     protected String cleanUpFilePath(String fileName)
583     {
584         if (fileName.length() > 3 && fileName.charAt(2) == ':')
585         {
586             // assuming something like ""/C:/program files/..."
587             return fileName.substring(1);
588         }
589         else if (fileName.startsWith("file://"))
590         {
591             return fileName.substring(7);
592         }
593 
594         return fileName;
595 
596     }
597 
598     /**
599      * A simple SAX Content Handler used to parse .msg files.
600      */
601     static class MsgXmlHandler extends DefaultHandler
602     {
603 
604         /**
605          * Parsed messages.
606          */
607         private List messages = new ArrayList();
608 
609         /**
610          * Error code for the current message.
611          */
612         private int code;
613 
614         /**
615          * Level for the current message.
616          */
617         private int level;
618 
619         /**
620          * Column for the current message.
621          */
622         private int column;
623 
624         /**
625          * Line for the current message.
626          */
627         private int line;
628 
629         /**
630          * Message the current message.
631          */
632         private StringBuffer textbuffer;
633 
634         /**
635          * Actual parsing position.
636          */
637         private int parsePosition = -100;
638 
639         /**
640          * actually parsing a detail tag.
641          */
642         private boolean intag;
643 
644         /**
645          * @see org.xml.sax.ContentHandler#startElement(String, String, String, org.xml.sax.Attributes)
646          */
647         public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException
648         {
649             if ("message".equals(qName))
650             {
651                 parsePosition = 0;
652                 textbuffer = new StringBuffer();
653             }
654             else
655             {
656                 parsePosition++;
657                 intag = true;
658             }
659         }
660 
661         /**
662          * @see org.xml.sax.ContentHandler#endElement(String, String, String)
663          */
664         public void endElement(String uri, String localName, String qName) throws SAXException
665         {
666             if ("message".equals(qName))
667             {
668                 TidyMessage message = new TidyMessage(code, line, column, TidyMessage.Level.fromCode(level), textbuffer
669                     .toString());
670                 messages.add(message);
671             }
672             intag = false;
673         }
674 
675         /**
676          * @see org.xml.sax.ContentHandler#characters(char[], int, int)
677          */
678         public void characters(char[] ch, int start, int length) throws SAXException
679         {
680             if (!intag)
681             {
682                 return;
683             }
684 
685             switch (parsePosition)
686             {
687                 case 1 :
688                     this.code = Integer.parseInt(new String(ch, start, length));
689                     break;
690                 case 2 :
691                     this.level = Integer.parseInt(new String(ch, start, length));
692                     break;
693                 case 3 :
694                     this.line = Integer.parseInt(new String(ch, start, length));
695                     break;
696                 case 4 :
697                     this.column = Integer.parseInt(new String(ch, start, length));
698                     break;
699                 case 5 :
700                     textbuffer.append(new String(ch, start, length));
701                     break;
702                 default :
703                     break;
704             }
705         }
706 
707         /**
708          * Returns the list of parsed messages.
709          * @return List containing TidyMessage elements
710          */
711         public List getMessages()
712         {
713             return messages;
714         }
715     }
716 }