View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *     Vlad Skarzhevskyy <vlads at users.sourceforge.net> (JTidy servlet  development)
18   *
19   *  The contributing author(s) would like to thank all those who
20   *  helped with testing, bug fixes, and patience.  This wouldn't
21   *  have been possible without all of you.
22   *
23   *  COPYRIGHT NOTICE:
24   *
25   *  This software and documentation is provided "as is," and
26   *  the copyright holders and contributing author(s) make no
27   *  representations or warranties, express or implied, including
28   *  but not limited to, warranties of merchantability or fitness
29   *  for any particular purpose or that the use of the software or
30   *  documentation will not infringe any third party patents,
31   *  copyrights, trademarks or other rights.
32   *
33   *  The copyright holders and contributing author(s) will not be
34   *  liable for any direct, indirect, special or consequential damages
35   *  arising out of any use of the software or documentation, even if
36   *  advised of the possibility of such damage.
37   *
38   *  Permission is hereby granted to use, copy, modify, and distribute
39   *  this source code, or portions hereof, documentation and executables,
40   *  for any purpose, without fee, subject to the following restrictions:
41   *
42   *  1. The origin of this source code must not be misrepresented.
43   *  2. Altered versions must be plainly marked as such and must
44   *     not be misrepresented as being the original source.
45   *  3. This Copyright notice may not be removed or altered from any
46   *     source or altered source distribution.
47   *
48   *  The copyright holders and contributing author(s) specifically
49   *  permit, without fee, and encourage the use of this source code
50   *  as a component for supporting the Hypertext Markup Language in
51   *  commercial products. If you use this source code in a product,
52   *  acknowledgment is not required but would be appreciated.
53   *
54   */
55  package org.w3c.tidy.servlet.util;
56  
57  import java.lang.reflect.Method;
58  import java.net.URLEncoder;
59  import java.util.Hashtable;
60  import java.util.Iterator;
61  import java.util.Map;
62  
63  
64  /***
65   * Converts a String to HTML by converting all special characters to HTML-entities.
66   * @author Vlad Skarzhevskyy <a href="mailto:skarzhevskyy@gmail.com">skarzhevskyy@gmail.com </a>
67   * @version $Revision: 1.5 $ ($Author: vlads $)
68   */
69  
70  public class HTMLEncode
71  {
72  
73      /***
74       * j2se 1.4 encode method, used by reflection if available.
75       */
76      private static Method encodeMethod14;
77  
78      static
79      {
80          // URLEncoder.encode(String) has been deprecated in J2SE 1.4.
81          // Take advantage of the new method URLEncoder.encode(String, enc) if J2SE 1.4 is used.
82          try
83          {
84              Class urlEncoderClass = Class.forName("java.net.URLEncoder");
85              encodeMethod14 = urlEncoderClass.getMethod("encode", new Class[]{String.class, String.class});
86          }
87          catch (Throwable ex)
88          {
89              // encodeMethod14 will be null if exception
90          }
91      }
92  
93      /***
94       * Utility class, don't instantiate.
95       */
96      private HTMLEncode()
97      {
98          // unused
99      }
100 
101     private static final String[] ENTITIES = {
102         ">",
103         "&gt;",
104         "<",
105         "&lt;",
106         "&",
107         "&amp;",
108         "\"",
109         "&quot;",
110         "'",
111         "&#039;",
112         "//",
113         "&#092;",
114         "\u00a9",
115         "&copy;",
116         "\u00ae",
117         "&reg;"};
118 
119     private static Hashtable entityTableEncode = null;
120 
121     protected static synchronized void buildEntityTables()
122     {
123         entityTableEncode = new Hashtable(ENTITIES.length);
124 
125         for (int i = 0; i < ENTITIES.length; i += 2)
126         {
127             if (!entityTableEncode.containsKey(ENTITIES[i]))
128             {
129                 entityTableEncode.put(ENTITIES[i], ENTITIES[i + 1]);
130             }
131         }
132     }
133 
134     /***
135      * Converts a String to HTML by converting all special characters to HTML-entities.
136      */
137     public final static String encode(String s)
138     {
139         return encode(s, "\n");
140     }
141 
142     /***
143      * Converts a String to HTML by converting all special characters to HTML-entities.
144      */
145     public final static String encode(String s, String cr)
146     {
147         if (entityTableEncode == null)
148         {
149             buildEntityTables();
150         }
151         if (s == null)
152         {
153             return "";
154         }
155         StringBuffer sb = new StringBuffer(s.length() * 2);
156         char ch;
157         for (int i = 0; i < s.length(); ++i)
158         {
159             ch = s.charAt(i);
160             if ((ch >= 63 && ch <= 90) || (ch >= 97 && ch <= 122) || (ch == ' '))
161             {
162                 sb.append(ch);
163             }
164             else if (ch == '\n')
165             {
166                 sb.append(cr);
167             }
168             else
169             {
170                 String chEnc = encodeSingleChar(String.valueOf(ch));
171                 if (chEnc != null)
172                 {
173                     sb.append(chEnc);
174                 }
175                 else
176                 {
177                     // Not 7 Bit use the unicode system
178                     sb.append("&#");
179                     sb.append(new Integer(ch).toString());
180                     sb.append(';');
181                 }
182             }
183         }
184         return sb.toString();
185     }
186 
187     /***
188      * Converts a single character to HTML
189      */
190     private static String encodeSingleChar(String ch)
191     {
192         return (String) entityTableEncode.get(ch);
193     }
194 
195     /***
196      * Converts a String to valid HTML HREF by converting all special characters to HTML-entities.
197      * @param url url to be encoded
198      * @return encoded url.
199      */
200     protected static String encodeHREFParam(String url)
201     {
202         if (encodeMethod14 != null)
203         {
204             Object[] methodArgs = new Object[2];
205             methodArgs[0] = url;
206 
207             methodArgs[1] = "UTF8";
208 
209             try
210             {
211                 return (String) encodeMethod14.invoke(null, methodArgs);
212             }
213             catch (Throwable e)
214             {
215                 throw new RuntimeException("Error invoking 1.4 URLEncoder.encode with reflection: " + e.getMessage());
216             }
217         }
218 
219         // must use J2SE 1.3 version
220         return URLEncoder.encode(url);
221 
222     }
223 
224     protected static String encodeHREFParamJava13(String value)
225     {
226         return URLEncoder.encode(value);
227     }
228 
229     public static String encodeQuery(String url, String[] args)
230     {
231         return encodeHREFQuery(url, args, false);
232     }
233 
234     public static String encodeHREFQuery(String url, String[] args)
235     {
236         return encodeHREFQuery(url, args, true);
237     }
238 
239     public static String encodeHREFQuery(String url, String[] args, boolean forHtml)
240     {
241         StringBuffer out = new StringBuffer(128);
242         out.append(url);
243 
244         if ((args != null) && (args.length > 0))
245         {
246             out.append("?");
247             for (int i = 0; i < (args.length + 1) / 2; i++)
248             {
249                 int k = i * 2;
250                 if (k != 0)
251                 {
252                     if (forHtml)
253                     {
254                         out.append("&amp;");
255                     }
256                     else
257                     {
258                         out.append("&");
259                     }
260                 }
261                 out.append(encodeHREFParam(args[k]));
262                 if (k + 1 < args.length)
263                 {
264                     out.append("=");
265                     out.append(encodeHREFParam(args[k + 1]));
266                 }
267             }
268         }
269         return out.toString();
270     }
271 
272     public static String encodeHREFQuery(String url, Map args, boolean forHtml)
273     {
274         StringBuffer out = new StringBuffer(128);
275         out.append(url);
276 
277         if ((args != null) && (args.size() > 0))
278         {
279             out.append("?");
280             int k = 0;
281             for (Iterator i = args.keySet().iterator(); i.hasNext();)
282             {
283                 if (k != 0)
284                 {
285                     if (forHtml)
286                     {
287                         out.append("&amp;");
288                     }
289                     else
290                     {
291                         out.append("&");
292                     }
293                 }
294                 String name = (String) i.next();
295                 out.append(encodeHREFParam(name));
296                 out.append("=");
297                 out.append(encodeHREFParam((String) args.get(name)));
298                 k++;
299             }
300         }
301         return out.toString();
302     }
303 }