View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *     Vlad Skarzhevskyy <vlads at users.sourceforge.net> (JTidy servlet  development)
18   *
19   *  The contributing author(s) would like to thank all those who
20   *  helped with testing, bug fixes, and patience.  This wouldn't
21   *  have been possible without all of you.
22   *
23   *  COPYRIGHT NOTICE:
24   *
25   *  This software and documentation is provided "as is," and
26   *  the copyright holders and contributing author(s) make no
27   *  representations or warranties, express or implied, including
28   *  but not limited to, warranties of merchantability or fitness
29   *  for any particular purpose or that the use of the software or
30   *  documentation will not infringe any third party patents,
31   *  copyrights, trademarks or other rights.
32   *
33   *  The copyright holders and contributing author(s) will not be
34   *  liable for any direct, indirect, special or consequential damages
35   *  arising out of any use of the software or documentation, even if
36   *  advised of the possibility of such damage.
37   *
38   *  Permission is hereby granted to use, copy, modify, and distribute
39   *  this source code, or portions hereof, documentation and executables,
40   *  for any purpose, without fee, subject to the following restrictions:
41   *
42   *  1. The origin of this source code must not be misrepresented.
43   *  2. Altered versions must be plainly marked as such and must
44   *     not be misrepresented as being the original source.
45   *  3. This Copyright notice may not be removed or altered from any
46   *     source or altered source distribution.
47   *
48   *  The copyright holders and contributing author(s) specifically
49   *  permit, without fee, and encourage the use of this source code
50   *  as a component for supporting the Hypertext Markup Language in
51   *  commercial products. If you use this source code in a product,
52   *  acknowledgment is not required but would be appreciated.
53   *
54   */
55  package org.w3c.tidy.servlet;
56  
57  /*
58   * Created on 02.10.2004 by vlads
59   */
60  import java.io.ByteArrayOutputStream;
61  import java.io.IOException;
62  import java.io.InputStream;
63  import java.io.OutputStream;
64  import java.io.PrintWriter;
65  import java.util.Properties;
66  import java.util.StringTokenizer;
67  
68  import javax.servlet.http.Cookie;
69  import javax.servlet.http.HttpServletRequest;
70  import javax.servlet.http.HttpServletResponse;
71  import javax.servlet.http.HttpSession;
72  
73  import org.apache.commons.logging.Log;
74  import org.apache.commons.logging.LogFactory;
75  import org.w3c.tidy.Configuration;
76  import org.w3c.tidy.Tidy;
77  import org.w3c.tidy.TidyMessage;
78  import org.w3c.tidy.servlet.jsp.tagext.ValidationImageTag;
79  import org.w3c.tidy.servlet.properties.JTidyServletProperties;
80  
81  
82  /***
83   * Common class used by Filter and Tag to process responce.
84   * @author Vlad Skarzhevskyy <a href="mailto:skarzhevskyy@gmail.com">skarzhevskyy@gmail.com </a>
85   * @version $Revision: 1.6 $ ($Author: fgiust $)
86   */
87  public class TidyProcessor
88  {
89  
90      /***
91       * The request with which this Processor is associated.
92       */
93      HttpSession httpSession;
94  
95      HttpServletRequest request;
96  
97      HttpServletResponse response;
98  
99      /***
100      * JTidy Parser configutation string Examples of config string: indent: auto; indent-spaces: 2
101      */
102     private String config;
103 
104     /***
105      * validateOnly only do not change output.
106      */
107     private boolean validateOnly;
108 
109     /***
110      * Performs validation of html processed by &lt;jtidy:tidy&gt; jsp tag By default this is not done. Only Usefull for
111      * testing JTidy This will create second requestID to store the data
112      */
113     private boolean doubleValidation;
114 
115     private boolean commentsSubst;
116 
117     /***
118      * Logger.
119      */
120     private Log log = LogFactory.getLog(TidyProcessor.class);
121 
122     /***
123      * Initialize Processor.
124      * @param request HttpServletRequest
125      * @param response HttpServletResponse
126      */
127     public TidyProcessor(
128         HttpSession httpSession,
129         HttpServletRequest httpServletRequest,
130         HttpServletResponse httpServletResponse)
131     {
132         this.httpSession = httpSession;
133         this.request = httpServletRequest;
134         this.response = httpServletResponse;
135     }
136 
137     /*
138      * Parser for JTidy configutation. Examples of config string: indent: auto; indent-spaces: 2 @param JTidy
139      * Configuration to change
140      */
141     private void parsConfig(Configuration configuration)
142     {
143         if (config == null)
144         {
145             return;
146         }
147         Properties properties = new Properties();
148         // assume Java 1.3 with no regex
149         StringTokenizer st = new StringTokenizer(config, ";");
150         while (st.hasMoreTokens())
151         {
152             String nv = st.nextToken();
153             int split = nv.indexOf(':');
154             if (split > 0)
155             {
156                 String n = nv.substring(0, split).trim();
157                 String v = nv.substring(split + 1).trim();
158                 if (Configuration.isKnownOption(n))
159                 {
160                     properties.put(n, v);
161                     log.debug("add option " + n + "=" + v);
162                 }
163                 else
164                 {
165                     log.warn("TidyTag unknown option " + n);
166                 }
167             }
168         }
169         configuration.addProps(properties);
170         configuration.adjust();
171     }
172 
173     public boolean parse(InputStream in, OutputStream out, String html)
174     {
175         if (this.request.getAttribute(Consts.ATTRIBUTE_IGNORE) != null)
176         {
177             log.debug("IGNORE");
178             return false;
179         }
180 
181         RepositoryFactory factory = JTidyServletProperties.getInstance().getRepositoryFactoryInstance();
182 
183         Object requestID = factory.getResponseID(this.httpSession, this.request, this.response, false);
184         if (requestID == null)
185         {
186             log.debug("IGNORE requestID == null");
187             return false;
188         }
189 
190         boolean secondPass = false;
191         // Avoid Double processing by Tag and Filter
192         if (this.request.getAttribute(Consts.ATTRIBUTE_PROCESSED) != null)
193         {
194             if (!doubleValidation)
195             {
196                 log.debug("IGNORE !doubleValidation");
197                 return false;
198             }
199             requestID = factory.getResponseID(this.httpSession, this.request, this.response, true);
200             secondPass = true;
201         }
202 
203         if (!secondPass)
204         {
205             log.debug("addCookie");
206             this.response.addCookie(new Cookie(Consts.ATTRIBUTE_REQUEST_ID, requestID.toString()));
207         }
208 
209         boolean rc = parse(in, out, html, requestID, factory);
210 
211         if (!secondPass)
212         {
213             // this.request.setAttribute(Consts.ATTRIBUTE_PROCESSED, shortMessage);
214             this.request.setAttribute(Consts.ATTRIBUTE_PROCESSED, requestID);
215         }
216 
217         if (rc && (!this.validateOnly) && (this.request.getAttribute(Consts.ATTRIBUTE_PASS) != null))
218         {
219             rc = false;
220         }
221 
222         return rc;
223     }
224 
225     public boolean parse(InputStream in, OutputStream out, String html, Object requestID, RepositoryFactory factory)
226     {
227         long start = System.currentTimeMillis();
228 
229         Tidy tidy = new Tidy();
230         parsConfig(tidy.getConfiguration());
231         tidy.setSmartIndent(true);
232         tidy.setQuiet(true);
233 
234         ByteArrayOutputStream mesageBuffer = new ByteArrayOutputStream();
235         PrintWriter pw = new PrintWriter(mesageBuffer);
236         tidy.setErrout(pw);
237 
238         boolean useOut = false;
239 
240         ResponseRecord result = factory.createRecord(requestID, this.httpSession, this.request, this.response);
241         result.setRequestID(requestID);
242         tidy.setMessageListener(result);
243 
244         boolean fatalError = false;
245 
246         ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
247 
248         try
249         {
250             log.debug("processing request " + requestID + "...");
251             tidy.parse(in, outBuffer);
252             useOut = (result.getParseErrors() == 0);
253             if (commentsSubst)
254             {
255                 doCommentsSubst(outBuffer, requestID);
256             }
257             if (out != null)
258             {
259                 outBuffer.writeTo(out);
260             }
261         }
262         catch (Throwable e)
263         {
264             log.error("JTidy parsing error", e);
265             result.messageReceived(new TidyMessage(0, 0, 0, TidyMessage.Level.ERROR, "JTidy parsing error"
266                 + e.getMessage()));
267             fatalError = true;
268         }
269 
270         // result.setParseErrors(tidy.getParseErrors());
271         // result.setParseWarnings(tidy.getParseWarnings());
272 
273         result.setHtmlInput(html);
274 
275         if ((result.getParseErrors() > 0) || fatalError)
276         {
277             result.setHtmlOutput(html);
278         }
279         else
280         {
281             result.setHtmlOutput(outBuffer.toString());
282         }
283 
284         if (!fatalError)
285         {
286             // @todo can't flush in tag body
287             // pw.flush();
288             // result.setReport(mesageBuffer.toString());
289         }
290 
291         long time = System.currentTimeMillis() - start;
292         result.setParsTime(time);
293         if (log.isDebugEnabled())
294         {
295             log.debug("processed in " + time + " millis");
296         }
297 
298         ResponseRecordRepository repository = factory.getRepositoryInstance(this.httpSession);
299         repository.addRecord(result);
300 
301         String shortMessage;
302         if ((result.getParseErrors() != 0) || (result.getParseWarnings() != 0))
303         {
304             if (result.getParseErrors() == 0)
305             {
306                 shortMessage = "found " + result.getParseWarnings() + " warnings in generated HTML";
307             }
308             else
309             {
310                 shortMessage = "found "
311                     + result.getParseErrors()
312                     + " errors and "
313                     + result.getParseWarnings()
314                     + " warnings in generated HTML";
315             }
316         }
317         else
318         {
319             shortMessage = "no problems found";
320         }
321 
322         log.info(shortMessage + " request " + requestID);
323 
324         return (useOut && (out != null));
325     }
326 
327     private void doCommentsSubst(ByteArrayOutputStream outBuffer, Object requestID)
328     {
329         log.debug("doCommentsSubst");
330         // Prohibit caching of application pages.
331         if (response != null)
332         {
333             response.setHeader("Pragma", "No-cache");
334             response.setHeader("Cache-Control", "no-cache");
335             response.setDateHeader("Expires", -1);
336         }
337 
338         String html = outBuffer.toString();
339         html = replaceAll(html, "<!--jtidy:requestID-->", requestID.toString());
340         String aLink = ValidationImageTag.getImageHTML(requestID.toString(), null, null, request);
341         html = replaceAll(html, "<!--jtidy:validationImage-->", aLink);
342         outBuffer.reset();
343         try
344         {
345             // to-do charsetName
346             outBuffer.write(html.getBytes());
347         }
348         catch (IOException e)
349         {
350             log.error("Internal error", e);
351         }
352     }
353 
354     /***
355      * @param config The config to set.
356      */
357     public void setConfig(String config)
358     {
359         this.config = config;
360     }
361 
362     /***
363      * @return Returns the doubleValidation.
364      */
365     public boolean isDoubleValidation()
366     {
367         return doubleValidation;
368     }
369 
370     /***
371      * @param doubleValidation The doubleValidation to set.
372      */
373     public void setDoubleValidation(boolean doubleValidation)
374     {
375         this.doubleValidation = doubleValidation;
376     }
377 
378     /***
379      * @param validateOnly The validateOnly to set.
380      */
381     public void setValidateOnly(boolean validateOnly)
382     {
383         this.validateOnly = validateOnly;
384     }
385 
386     /***
387      * @param commentsSubst The commentsSubst to set.
388      */
389     public void setCommentsSubst(boolean commentsSubst)
390     {
391         this.commentsSubst = commentsSubst;
392     }
393 
394     /***
395      * jre 1.3 compatible replaceAll.
396      * @param str text to search and replace in
397      * @param replace the String to search for
398      * @param replacement the String to replace with
399      * @return the text with any replacements processed
400      */
401     public String replaceAll(String str, String replace, String replacement)
402     {
403         StringBuffer sb = new StringBuffer(str);
404         int firstOccurrence = str.indexOf(replace);
405 
406         while (firstOccurrence != -1)
407         {
408             sb.replace(firstOccurrence, firstOccurrence + replace.length(), replacement);
409             firstOccurrence = sb.toString().indexOf(replace);
410         }
411 
412         return sb.toString();
413     }
414 
415 }