View Javadoc

1   /*
2    *  Java HTML Tidy - JTidy
3    *  HTML parser and pretty printer
4    *
5    *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6    *  Institute of Technology, Institut National de Recherche en
7    *  Informatique et en Automatique, Keio University). All Rights
8    *  Reserved.
9    *
10   *  Contributing Author(s):
11   *
12   *     Dave Raggett <dsr@w3.org>
13   *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14   *     Gary L Peskin <garyp@firstech.com> (Java development)
15   *     Sami Lempinen <sami@lempinen.net> (release management)
16   *     Fabrizio Giustina <fgiust at users.sourceforge.net>
17   *
18   *  The contributing author(s) would like to thank all those who
19   *  helped with testing, bug fixes, and patience.  This wouldn't
20   *  have been possible without all of you.
21   *
22   *  COPYRIGHT NOTICE:
23   * 
24   *  This software and documentation is provided "as is," and
25   *  the copyright holders and contributing author(s) make no
26   *  representations or warranties, express or implied, including
27   *  but not limited to, warranties of merchantability or fitness
28   *  for any particular purpose or that the use of the software or
29   *  documentation will not infringe any third party patents,
30   *  copyrights, trademarks or other rights. 
31   *
32   *  The copyright holders and contributing author(s) will not be
33   *  liable for any direct, indirect, special or consequential damages
34   *  arising out of any use of the software or documentation, even if
35   *  advised of the possibility of such damage.
36   *
37   *  Permission is hereby granted to use, copy, modify, and distribute
38   *  this source code, or portions hereof, documentation and executables,
39   *  for any purpose, without fee, subject to the following restrictions:
40   *
41   *  1. The origin of this source code must not be misrepresented.
42   *  2. Altered versions must be plainly marked as such and must
43   *     not be misrepresented as being the original source.
44   *  3. This Copyright notice may not be removed or altered from any
45   *     source or altered source distribution.
46   * 
47   *  The copyright holders and contributing author(s) specifically
48   *  permit, without fee, and encourage the use of this source code
49   *  as a component for supporting the Hypertext Markup Language in
50   *  commercial products. If you use this source code in a product,
51   *  acknowledgment is not required but would be appreciated.
52   *
53   */
54  package org.w3c.tidy;
55  
56  import java.io.IOException;
57  import java.io.InputStream;
58  import java.io.InputStreamReader;
59  import java.io.Reader;
60  import java.io.UnsupportedEncodingException;
61  
62  
63  /**
64   * StreamIn Implementation using java writers.
65   * @author Fabrizio Giustina
66   * @version $Revision: 807 $ ($Author: fgiust $)
67   */
68  public class StreamInJavaImpl implements StreamIn
69  {
70  
71      /**
72       * number of characters kept in buffer.
73       */
74      private static final int CHARBUF_SIZE = 5;
75  
76      /**
77       * character buffer.
78       */
79      private int[] charbuf = new int[CHARBUF_SIZE];
80  
81      /**
82       * actual position in buffer.
83       */
84      private int bufpos;
85  
86      /**
87       * Java input stream reader.
88       */
89      private Reader reader;
90  
91      /**
92       * has end of stream been reached?
93       */
94      private boolean endOfStream;
95  
96      /**
97       * Is char pushed?
98       */
99      private boolean pushed;
100 
101     /**
102      * current column number.
103      */
104     private int curcol;
105 
106     /**
107      * last column.
108      */
109     private int lastcol;
110 
111     /**
112      * current line number.
113      */
114     private int curline;
115 
116     /**
117      * tab size in chars.
118      */
119     private int tabsize;
120 
121     private int tabs;
122 
123     /**
124      * Instantiates a new StreamInJavaImpl.
125      * @param stream
126      * @param encoding
127      * @param tabsize
128      * @throws UnsupportedEncodingException
129      */
130     protected StreamInJavaImpl(InputStream stream, String encoding, int tabsize) throws UnsupportedEncodingException
131     {
132         reader = new InputStreamReader(stream, encoding);
133         this.pushed = false;
134         this.tabsize = tabsize;
135         this.curline = 1;
136         this.curcol = 1;
137         this.endOfStream = false;
138     }
139 
140     /**
141      * Instantiates a new StreamInJavaImpl.
142      * @param stream
143      * @param encoding
144      * @param tabsize
145      */
146     protected StreamInJavaImpl(Reader reader, int tabsize)
147     {
148         this.reader = reader;
149         this.pushed = false;
150         this.tabsize = tabsize;
151         this.curline = 1;
152         this.curcol = 1;
153         this.endOfStream = false;
154     }
155 
156     /**
157      * @see org.w3c.tidy.StreamIn#readCharFromStream()
158      */
159     public int readCharFromStream()
160     {
161         int c;
162         try
163         {
164             c = reader.read();
165             if (c < 0)
166             {
167                 endOfStream = true;
168             }
169 
170         }
171         catch (IOException e)
172         {
173             // @todo how to handle?
174             endOfStream = true;
175             return END_OF_STREAM;
176         }
177 
178         return c;
179     }
180 
181     /**
182      * @see org.w3c.tidy.StreamIn#readChar()
183      */
184     public int readChar()
185     {
186         int c;
187 
188         if (this.pushed)
189         {
190             c = this.charbuf[--(this.bufpos)];
191             if ((this.bufpos) == 0)
192             {
193                 this.pushed = false;
194             }
195 
196             if (c == '\n')
197             {
198                 this.curcol = 1;
199                 this.curline++;
200                 return c;
201             }
202 
203             this.curcol++;
204             return c;
205         }
206 
207         this.lastcol = this.curcol;
208 
209         if (this.tabs > 0)
210         {
211             this.curcol++;
212             this.tabs--;
213             return ' ';
214         }
215 
216         c = readCharFromStream();
217 
218         if (c < 0)
219         {
220             endOfStream = true;
221             return END_OF_STREAM;
222         }
223 
224         if (c == '\n')
225         {
226             this.curcol = 1;
227             this.curline++;
228             return c;
229         }
230         else if (c == '\r') // \r\n
231         {
232             c = readCharFromStream();
233             if (c != '\n')
234             {
235                 if (c != END_OF_STREAM)
236                 {
237                     ungetChar(c);
238                 }
239                 c = '\n';
240             }
241             this.curcol = 1;
242             this.curline++;
243             return c;
244         }
245 
246         if (c == '\t')
247         {
248             this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
249             this.curcol++;
250             c = ' ';
251             return c;
252         }
253 
254         this.curcol++;
255 
256         return c;
257     }
258 
259     /**
260      * @see org.w3c.tidy.StreamIn#ungetChar(int)
261      */
262     public void ungetChar(int c)
263     {
264         this.pushed = true;
265         if (this.bufpos >= CHARBUF_SIZE)
266         {
267             // pop last element
268             System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1);
269             this.bufpos--;
270         }
271         this.charbuf[(this.bufpos)++] = c;
272 
273         if (c == '\n')
274         {
275             --this.curline;
276         }
277 
278         this.curcol = this.lastcol;
279     }
280 
281     /**
282      * @see org.w3c.tidy.StreamIn#isEndOfStream()
283      */
284     public boolean isEndOfStream()
285     {
286         return endOfStream;
287     }
288 
289     /**
290      * Getter for <code>curcol</code>.
291      * @return Returns the curcol.
292      */
293     public int getCurcol()
294     {
295         return this.curcol;
296     }
297 
298     /**
299      * Getter for <code>curline</code>.
300      * @return Returns the curline.
301      */
302     public int getCurline()
303     {
304         return this.curline;
305     }
306 
307     /**
308      * @see org.w3c.tidy.StreamIn#setLexer(org.w3c.tidy.Lexer)
309      */
310     public void setLexer(Lexer lexer)
311     {
312         // unused in the java implementation
313     }
314 
315 }