View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.IOException;
57  import java.io.InputStream;
58  import java.io.InputStreamReader;
59  import java.io.Reader;
60  import java.io.UnsupportedEncodingException;
61  
62  
63  /***
64   * StreamIn Implementation using java writers.
65   * @author Fabrizio Giustina
66   * @version $Revision: 1.5 $ ($Author: fgiust $)
67   */
68  public class StreamInJavaImpl implements StreamIn
69  {
70  
71      /***
72       * number of characters kept in buffer.
73       */
74      private static final int CHARBUF_SIZE = 5;
75  
76      /***
77       * character buffer.
78       */
79      private int[] charbuf = new int[CHARBUF_SIZE];
80  
81      /***
82       * actual position in buffer.
83       */
84      private int bufpos;
85  
86      /***
87       * Java input stream reader.
88       */
89      private Reader reader;
90  
91      /***
92       * has end of stream been reached?
93       */
94      private boolean endOfStream;
95  
96      /***
97       * Is char pushed?
98       */
99      private boolean pushed;
100 
101     /***
102      * current column number.
103      */
104     private int curcol;
105 
106     /***
107      * last column.
108      */
109     private int lastcol;
110 
111     /***
112      * current line number.
113      */
114     private int curline;
115 
116     /***
117      * tab size in chars.
118      */
119     private int tabsize;
120 
121     private int tabs;
122 
123     /***
124      * Instantiates a new StreamInJavaImpl.
125      * @param stream
126      * @param encoding
127      * @param tabsize
128      * @throws UnsupportedEncodingException
129      */
130     public StreamInJavaImpl(InputStream stream, String encoding, int tabsize) throws UnsupportedEncodingException
131     {
132         reader = new InputStreamReader(stream, encoding);
133         this.pushed = false;
134         this.tabsize = tabsize;
135         this.curline = 1;
136         this.curcol = 1;
137         this.endOfStream = false;
138     }
139 
140     /***
141      * @see org.w3c.tidy.StreamIn#readCharFromStream()
142      */
143     public int readCharFromStream()
144     {
145         int c;
146         try
147         {
148             c = reader.read();
149             if (c < 0)
150             {
151                 endOfStream = true;
152             }
153 
154         }
155         catch (IOException e)
156         {
157             // @todo how to handle?
158             endOfStream = true;
159             return END_OF_STREAM;
160         }
161 
162         return c;
163     }
164 
165     /***
166      * @see org.w3c.tidy.StreamIn#readChar()
167      */
168     public int readChar()
169     {
170         int c;
171 
172         if (this.pushed)
173         {
174             c = this.charbuf[--(this.bufpos)];
175             if ((this.bufpos) == 0)
176             {
177                 this.pushed = false;
178             }
179 
180             if (c == '\n')
181             {
182                 this.curcol = 1;
183                 this.curline++;
184                 return c;
185             }
186 
187             this.curcol++;
188             return c;
189         }
190 
191         this.lastcol = this.curcol;
192 
193         if (this.tabs > 0)
194         {
195             this.curcol++;
196             this.tabs--;
197             return ' ';
198         }
199 
200         c = readCharFromStream();
201 
202         if (c < 0)
203         {
204             endOfStream = true;
205             return END_OF_STREAM;
206         }
207 
208         if (c == '\n')
209         {
210             this.curcol = 1;
211             this.curline++;
212             return c;
213         }
214         else if (c == '\r') // \r\n
215         {
216             c = readCharFromStream();
217             if (c != '\n')
218             {
219                 if (c != END_OF_STREAM)
220                 {
221                     ungetChar(c);
222                 }
223                 c = '\n';
224             }
225             this.curcol = 1;
226             this.curline++;
227             return c;
228         }
229 
230         if (c == '\t')
231         {
232             this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
233             this.curcol++;
234             c = ' ';
235             return c;
236         }
237 
238         this.curcol++;
239 
240         return c;
241     }
242 
243     /***
244      * @see org.w3c.tidy.StreamIn#ungetChar(int)
245      */
246     public void ungetChar(int c)
247     {
248         this.pushed = true;
249         if (this.bufpos >= CHARBUF_SIZE)
250         {
251             // pop last element
252             System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1);
253             this.bufpos--;
254         }
255         this.charbuf[(this.bufpos)++] = c;
256 
257         if (c == '\n')
258         {
259             --this.curline;
260         }
261 
262         this.curcol = this.lastcol;
263     }
264 
265     /***
266      * @see org.w3c.tidy.StreamIn#isEndOfStream()
267      */
268     public boolean isEndOfStream()
269     {
270         return endOfStream;
271     }
272 
273     /***
274      * Getter for <code>curcol</code>.
275      * @return Returns the curcol.
276      */
277     public int getCurcol()
278     {
279         return this.curcol;
280     }
281 
282     /***
283      * Getter for <code>curline</code>.
284      * @return Returns the curline.
285      */
286     public int getCurline()
287     {
288         return this.curline;
289     }
290 
291     /***
292      * @see org.w3c.tidy.StreamIn#setLexer(org.w3c.tidy.Lexer)
293      */
294     public void setLexer(Lexer lexer)
295     {
296         // unused in the java implementation
297     }
298 
299 }