Clover coverage report - JTidy - r8-SNAPSHOT
Coverage timestamp: Tue Jan 4 2005 09:35:24 PST
file stats: LOC: 806   Methods: 11
NCLOC: 497   Classes: 1
 
 Source file Conditionals Statements Methods TOTAL
StreamInImpl.java 23% 30.6% 45.5% 28.6%
coverage coverage
 1    /*
 2    * Java HTML Tidy - JTidy
 3    * HTML parser and pretty printer
 4    *
 5    * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 6    * Institute of Technology, Institut National de Recherche en
 7    * Informatique et en Automatique, Keio University). All Rights
 8    * Reserved.
 9    *
 10    * Contributing Author(s):
 11    *
 12    * Dave Raggett <dsr@w3.org>
 13    * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
 14    * Gary L Peskin <garyp@firstech.com> (Java development)
 15    * Sami Lempinen <sami@lempinen.net> (release management)
 16    * Fabrizio Giustina <fgiust at users.sourceforge.net>
 17    *
 18    * The contributing author(s) would like to thank all those who
 19    * helped with testing, bug fixes, and patience. This wouldn't
 20    * have been possible without all of you.
 21    *
 22    * COPYRIGHT NOTICE:
 23    *
 24    * This software and documentation is provided "as is," and
 25    * the copyright holders and contributing author(s) make no
 26    * representations or warranties, express or implied, including
 27    * but not limited to, warranties of merchantability or fitness
 28    * for any particular purpose or that the use of the software or
 29    * documentation will not infringe any third party patents,
 30    * copyrights, trademarks or other rights.
 31    *
 32    * The copyright holders and contributing author(s) will not be
 33    * liable for any direct, indirect, special or consequential damages
 34    * arising out of any use of the software or documentation, even if
 35    * advised of the possibility of such damage.
 36    *
 37    * Permission is hereby granted to use, copy, modify, and distribute
 38    * this source code, or portions hereof, documentation and executables,
 39    * for any purpose, without fee, subject to the following restrictions:
 40    *
 41    * 1. The origin of this source code must not be misrepresented.
 42    * 2. Altered versions must be plainly marked as such and must
 43    * not be misrepresented as being the original source.
 44    * 3. This Copyright notice may not be removed or altered from any
 45    * source or altered source distribution.
 46    *
 47    * The copyright holders and contributing author(s) specifically
 48    * permit, without fee, and encourage the use of this source code
 49    * as a component for supporting the Hypertext Markup Language in
 50    * commercial products. If you use this source code in a product,
 51    * acknowledgment is not required but would be appreciated.
 52    *
 53    */
 54    package org.w3c.tidy;
 55   
 56    import java.io.IOException;
 57    import java.io.InputStream;
 58   
 59    import org.w3c.tidy.EncodingUtils.GetBytes;
 60   
 61   
 62    /**
 63    * Input Stream Implementation. This implementation is from the c version of tidy and it doesn't take advantage of java
 64    * readers.
 65    * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
 66    * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
 67    * @author Fabrizio Giustina
 68    * @version $Revision: 1.28 $ ($Author: fgiust $)
 69    */
 70    public class StreamInImpl implements StreamIn
 71    {
 72   
 73    /**
 74    * number of characters kept in buffer.
 75    */
 76    private static final int CHARBUF_SIZE = 5;
 77   
 78    /**
 79    * needed for error reporting.
 80    */
 81    private Lexer lexer;
 82   
 83    /**
 84    * character buffer.
 85    */
 86    private int[] charbuf = new int[CHARBUF_SIZE];
 87   
 88    /**
 89    * actual position in buffer.
 90    */
 91    private int bufpos;
 92   
 93    /**
 94    * Private unget buffer for the raw bytes read from the input stream. Normally this will only be used by the UTF-8
 95    * decoder to resynchronize the input stream after finding an illegal UTF-8 sequences. But it can be used for other
 96    * purposes when reading bytes in ReadCharFromStream.
 97    */
 98    private char[] rawBytebuf = new char[CHARBUF_SIZE];
 99   
 100    /**
 101    * actual position in rawBytebuf.
 102    */
 103    private int rawBufpos;
 104   
 105    /**
 106    * has a raw byte been pushed into stack?
 107    */
 108    private boolean rawPushed;
 109   
 110    /**
 111    * looking for an UTF BOM?
 112    */
 113    private boolean lookingForBOM = true;
 114   
 115    /**
 116    * has end of stream been reached?
 117    */
 118    private boolean endOfStream;
 119   
 120    private boolean pushed;
 121   
 122    private int tabs;
 123   
 124    /**
 125    * tab size in chars.
 126    */
 127    private int tabsize;
 128   
 129    /**
 130    * FSM for ISO2022.
 131    */
 132    private int state;
 133   
 134    /**
 135    * Encoding.
 136    */
 137    private int encoding;
 138   
 139    /**
 140    * current column number.
 141    */
 142    private int curcol;
 143   
 144    /**
 145    * last column.
 146    */
 147    private int lastcol;
 148   
 149    /**
 150    * current line number.
 151    */
 152    private int curline;
 153   
 154    /**
 155    * input stream.
 156    */
 157    private InputStream stream;
 158   
 159    /**
 160    * Getter.
 161    */
 162    private GetBytes getBytes;
 163   
 164    /**
 165    * Avoid mapping values > 127 to entities.
 166    */
 167    private boolean rawOut;
 168   
 169    /**
 170    * Instatiates a new StreamInImpl.
 171    * @param stream input stream
 172    * @param configuration Configuration
 173    */
 174  8 public StreamInImpl(InputStream stream, Configuration configuration)
 175    {
 176  8 this.stream = stream;
 177  8 this.charbuf[0] = '\0';
 178  8 this.tabsize = configuration.tabsize;
 179  8 this.curline = 1;
 180  8 this.curcol = 1;
 181  8 this.encoding = configuration.getInCharEncoding();
 182  8 this.rawOut = configuration.rawOut;
 183  8 this.state = EncodingUtils.FSM_ASCII;
 184  8 this.getBytes = new GetBytes()
 185    {
 186   
 187    StreamInImpl in;
 188   
 189  8 GetBytes setStreamIn(StreamInImpl in)
 190    {
 191  8 this.in = in;
 192  8 return this;
 193    }
 194   
 195  0 public void doGet(int[] buf, int[] count, boolean unget)
 196    {
 197  0 in.readRawBytesFromStream(buf, count, unget);
 198    }
 199    } // set the StreamInImpl instance directly
 200    .setStreamIn(this);
 201    }
 202   
 203    /**
 204    * @see org.w3c.tidy.StreamIn#getCurcol()
 205    */
 206  0 public int getCurcol()
 207    {
 208  0 return this.curcol;
 209    }
 210   
 211    /**
 212    * @see org.w3c.tidy.StreamIn#getCurline()
 213    */
 214  0 public int getCurline()
 215    {
 216  0 return this.curline;
 217    }
 218   
 219    /**
 220    * Setter for <code>lexer</code>.
 221    * @param lexer The lexer to set.
 222    */
 223  8 public void setLexer(Lexer lexer)
 224    {
 225  8 this.lexer = lexer;
 226    }
 227   
 228    /**
 229    * @see org.w3c.tidy.StreamIn#readChar()
 230    */
 231  0 public int readChar()
 232    {
 233  0 int c;
 234   
 235  0 if (this.pushed)
 236    {
 237  0 c = this.charbuf[--(this.bufpos)];
 238  0 if ((this.bufpos) == 0)
 239    {
 240  0 this.pushed = false;
 241    }
 242   
 243  0 if (c == '\n')
 244    {
 245  0 this.curcol = 1;
 246  0 this.curline++;
 247    }
 248    else
 249    {
 250  0 this.curcol++;
 251    }
 252   
 253  0 return c;
 254    }
 255   
 256  0 this.lastcol = this.curcol;
 257   
 258  0 if (this.tabs > 0)
 259    {
 260  0 this.curcol++;
 261  0 this.tabs--;
 262  0 return ' ';
 263    }
 264   
 265  0 while (true)
 266    {
 267  0 c = readCharFromStream();
 268   
 269  0 if (c < 0)
 270    {
 271  0 return END_OF_STREAM;
 272    }
 273   
 274  0 if (c == '\n')
 275    {
 276  0 this.curcol = 1;
 277  0 this.curline++;
 278  0 break;
 279    }
 280   
 281    // #427663 - map '\r' to '\n' - Andy Quick 11 Aug 00
 282  0 if (c == '\r')
 283    {
 284  0 c = readCharFromStream();
 285  0 if (c != '\n')
 286    {
 287  0 if (c != END_OF_STREAM) // EOF fix by Terry Teague 12 Aug 01
 288    {
 289  0 ungetChar(c);
 290    }
 291  0 c = '\n';
 292    }
 293  0 this.curcol = 1;
 294  0 this.curline++;
 295  0 break;
 296    }
 297   
 298  0 if (c == '\t')
 299    {
 300  0 this.tabs = this.tabsize - ((this.curcol - 1) % this.tabsize) - 1;
 301  0 this.curcol++;
 302  0 c = ' ';
 303  0 break;
 304    }
 305   
 306    // strip control characters, except for Esc
 307  0 if (c == '\033')
 308    {
 309  0 break;
 310    }
 311  0 else if (c == '\015' && !lexer.configuration.xmlTags) //Form Feed is allowed in HTML
 312    {
 313  0 break;
 314    }
 315  0 else if (0 < c && c < 32)
 316    {
 317  0 continue; // discard control char
 318    }
 319   
 320    // watch out for chars that have already been decoded such as
 321    // IS02022, UTF-8 etc, that don't require further decoding
 322  0 if (rawOut
 323    || this.encoding == Configuration.ISO2022
 324    || this.encoding == Configuration.UTF8
 325    || this.encoding == Configuration.SHIFTJIS // #431953 - RJ
 326    || this.encoding == Configuration.BIG5) // #431953 - RJ
 327    {
 328  0 this.curcol++;
 329  0 break;
 330    }
 331   
 332    // handle surrogate pairs
 333  0 if ((this.encoding == Configuration.UTF16LE)
 334    || (this.encoding == Configuration.UTF16)
 335    || (this.encoding == Configuration.UTF16BE))
 336    {
 337  0 if (c > EncodingUtils.MAX_UTF8_FROM_UCS4)
 338    {
 339    // invalid UTF-16 value
 340  0 this.lexer.report.encodingError(this.lexer, Report.INVALID_UTF16 | Report.DISCARDED_CHAR, c);
 341  0 c = 0;
 342    }
 343    // high surrogate
 344  0 else if (c >= EncodingUtils.UTF16_LOW_SURROGATE_BEGIN && c <= EncodingUtils.UTF16_LOW_SURROGATE_END)
 345    {
 346  0 int n, m;
 347   
 348  0 n = c;
 349   
 350  0 m = readCharFromStream();
 351  0 if (m < 0)
 352    {
 353  0 return END_OF_STREAM;
 354    }
 355    // low surrogate
 356  0 if (m >= EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN && m <= EncodingUtils.UTF16_HIGH_SURROGATE_END)
 357    {
 358    // pair found, recombine them
 359  0 c = (n - EncodingUtils.UTF16_LOW_SURROGATE_BEGIN)
 360    * 0x400
 361    + (m - EncodingUtils.UTF16_HIGH_SURROGATE_BEGIN)
 362    + 0x10000;
 363   
 364    // check for invalid pairs
 365  0 if (((c & 0x0000FFFE) == 0x0000FFFE)
 366    || ((c & 0x0000FFFF) == 0x0000FFFF)
 367    || (c < EncodingUtils.UTF16_SURROGATES_BEGIN))
 368    {
 369  0 this.lexer.report
 370    .encodingError(this.lexer, Report.INVALID_UTF16 | Report.DISCARDED_CHAR, c);
 371  0 c = 0;
 372    }
 373    }
 374    else
 375    {
 376    // not a valid pair
 377  0 this.lexer.report.encodingError(this.lexer, Report.INVALID_UTF16 | Report.DISCARDED_CHAR, c);
 378  0 c = 0;
 379    // should we unget the just read char?
 380    }
 381    }
 382    else
 383    {
 384    // no recombination needed
 385    }
 386    }
 387   
 388  0 if (this.encoding == Configuration.MACROMAN)
 389    {
 390  0 c = EncodingUtils.decodeMacRoman(c);
 391    }
 392   
 393    // produced e.g. as a side-effect of smart quotes in Word
 394    // but can't happen if using MACROMAN encoding
 395  0 if (127 < c && c < 160)
 396    {
 397  0 int c1 = 0;
 398  0 int replaceMode;
 399   
 400    // set error position just before offending character
 401  0 this.lexer.lines = this.curline;
 402  0 this.lexer.columns = this.curcol;
 403   
 404  0 if ((this.encoding == Configuration.WIN1252)
 405    || (this.lexer.configuration.replacementCharEncoding == Configuration.WIN1252))
 406    {
 407  0 c1 = EncodingUtils.decodeWin1252(c);
 408    }
 409  0 else if (this.lexer.configuration.replacementCharEncoding == Configuration.MACROMAN)
 410    {
 411  0 c1 = EncodingUtils.decodeMacRoman(c);
 412    }
 413   
 414  0 replaceMode = TidyUtils.toBoolean(c1) ? Report.REPLACED_CHAR : Report.DISCARDED_CHAR;
 415   
 416  0 if ((c1 == 0) && (this.encoding == Configuration.WIN1252) || (this.encoding == Configuration.MACROMAN))
 417    {
 418  0 this.lexer.report.encodingError(this.lexer, Report.VENDOR_SPECIFIC_CHARS | replaceMode, c);
 419    }
 420  0 else if ((this.encoding != Configuration.WIN1252) && (this.encoding != Configuration.MACROMAN))
 421    {
 422  0 this.lexer.report.encodingError(this.lexer, Report.INVALID_SGML_CHARS | replaceMode, c);
 423    }
 424   
 425  0 c = c1;
 426    }
 427   
 428  0 if (c == 0)
 429    {
 430  0 continue; // illegal char is discarded
 431    }
 432   
 433  0 this.curcol++;
 434  0 break;
 435    }
 436   
 437  0 return c;
 438    }
 439   
 440    /**
 441    * @see org.w3c.tidy.StreamIn#ungetChar(int)
 442    */
 443  0 public void ungetChar(int c)
 444    {
 445  0 this.pushed = true;
 446  0 if (this.bufpos >= CHARBUF_SIZE)
 447    {
 448    // pop last element
 449  0 System.arraycopy(this.charbuf, 0, this.charbuf, 1, CHARBUF_SIZE - 1);
 450  0 this.bufpos--;
 451    }
 452  0 this.charbuf[(this.bufpos)++] = c;
 453   
 454  0 if (c == '\n')
 455    {
 456  0 --this.curline;
 457    }
 458   
 459  0 this.curcol = this.lastcol;
 460    }
 461   
 462    /**
 463    * @see org.w3c.tidy.StreamIn#isEndOfStream()
 464    */
 465  0 public boolean isEndOfStream()
 466    {
 467  0 return this.endOfStream;
 468    }
 469   
 470    /**
 471    * @see org.w3c.tidy.StreamIn#readCharFromStream()
 472    */
 473  8 public int readCharFromStream()
 474    {
 475  8 int c;
 476  8 int[] n = new int[]{0};
 477  8 int[] tempchar = new int[1];
 478  8 int[] count = new int[]{1};
 479   
 480  8 readRawBytesFromStream(tempchar, count, false);
 481  8 if (count[0] <= 0)
 482    {
 483  0 endOfStream = true;
 484  0 return END_OF_STREAM;
 485    }
 486   
 487  8 c = tempchar[0];
 488   
 489  8 if (lookingForBOM
 490    && (this.encoding == Configuration.UTF16
 491    || this.encoding == Configuration.UTF16LE
 492    || this.encoding == Configuration.UTF16BE || this.encoding == Configuration.UTF8))
 493    {
 494    // check for a Byte Order Mark
 495  3 int c1, bom;
 496   
 497  3 lookingForBOM = false;
 498   
 499  3 if (c == END_OF_STREAM)
 500    {
 501  0 lookingForBOM = false;
 502  0 endOfStream = true;
 503  0 return END_OF_STREAM;
 504    }
 505   
 506  3 count[0] = 1;
 507  3 readRawBytesFromStream(tempchar, count, false);
 508  3 c1 = tempchar[0];
 509   
 510  3 bom = (c << 8) + c1;
 511   
 512  3 if (bom == EncodingUtils.UNICODE_BOM_BE)
 513    {
 514    // big-endian UTF-16
 515  1 if (this.encoding != Configuration.UTF16 && this.encoding != Configuration.UTF16BE)
 516    {
 517  0 this.lexer.report.encodingError(this.lexer, Report.ENCODING_MISMATCH, Configuration.UTF16BE);
 518    // non-fatal error
 519    }
 520  1 this.encoding = Configuration.UTF16BE;
 521  1 this.lexer.configuration.setInCharEncoding(Configuration.UTF16BE);
 522  1 return EncodingUtils.UNICODE_BOM; // return decoded BOM
 523    }
 524  2 else if (bom == EncodingUtils.UNICODE_BOM_LE)
 525    {
 526    // little-endian UTF-16
 527  1 if (this.encoding != Configuration.UTF16 && this.encoding != Configuration.UTF16LE)
 528    {
 529  0 this.lexer.report.encodingError(this.lexer, Report.ENCODING_MISMATCH, Configuration.UTF16LE);
 530    // non-fatal error
 531    }
 532  1 this.encoding = Configuration.UTF16LE;
 533  1 this.lexer.configuration.setInCharEncoding(Configuration.UTF16LE);
 534  1 return EncodingUtils.UNICODE_BOM; // return decoded BOM
 535    }
 536    else
 537    {
 538  1 int c2;
 539   
 540  1 count[0] = 1;
 541  1 readRawBytesFromStream(tempchar, count, false);
 542  1 c2 = tempchar[0];
 543   
 544  1 if (((c << 16) + (c1 << 8) + c2) == EncodingUtils.UNICODE_BOM_UTF8)
 545    {
 546    // UTF-8
 547  0 this.encoding = Configuration.UTF8;
 548  0 if (this.encoding != Configuration.UTF8)
 549    {
 550  0 this.lexer.report.encodingError(this.lexer, Report.ENCODING_MISMATCH, Configuration.UTF8);
 551    // non-fatal error
 552    }
 553  0 this.lexer.configuration.setInCharEncoding(Configuration.UTF8);
 554  0 return EncodingUtils.UNICODE_BOM; // return decoded BOM
 555    }
 556   
 557    // the 2nd and/or 3rd bytes weren't what we were expecting, so unget the extra 2 bytes
 558  1 rawPushed = true;
 559   
 560  1 if ((rawBufpos + 1) >= CHARBUF_SIZE)
 561    {
 562  0 System.arraycopy(rawBytebuf, 2, rawBytebuf, 0, CHARBUF_SIZE - 2);
 563  0 rawBufpos -= 2;
 564    }
 565    // make sure the bytes are pushed in the right order
 566  1 rawBytebuf[rawBufpos++] = (char) c2;
 567  1 rawBytebuf[rawBufpos++] = (char) c1;
 568    // drop through to code below, with the original char
 569   
 570    }
 571    }
 572   
 573  6 this.lookingForBOM = false;
 574   
 575    // A document in ISO-2022 based encoding uses some ESC sequences called "designator" to switch character sets.
 576    // The designators defined and used in ISO-2022-JP are: "ESC" + "(" + ? for ISO646 variants "ESC" + "$" + ? and
 577    // "ESC" + "$" + "(" + ? for multibyte character sets Where ? stands for a single character used to indicate the
 578    // character set for multibyte characters. Tidy handles this by preserving the escape sequence and setting the
 579    // top bit of each byte for non-ascii chars. This bit is then cleared on output. The input stream keeps track of
 580    // the state to determine when to set/clear the bit.
 581   
 582  6 if (this.encoding == Configuration.ISO2022)
 583    {
 584  0 if (c == 0x1b) // ESC
 585    {
 586  0 this.state = EncodingUtils.FSM_ESC;
 587  0 return c;
 588    }
 589   
 590  0 switch (this.state)
 591    {
 592  0 case EncodingUtils.FSM_ESC :
 593  0 if (c == '$')
 594    {
 595  0 this.state = EncodingUtils.FSM_ESCD;
 596    }
 597  0 else if (c == '(')
 598    {
 599  0 this.state = EncodingUtils.FSM_ESCP;
 600    }
 601    else
 602    {
 603  0 this.state = EncodingUtils.FSM_ASCII;
 604    }
 605  0 break;
 606   
 607  0 case EncodingUtils.FSM_ESCD :
 608  0 if (c == '(')
 609    {
 610  0 this.state = EncodingUtils.FSM_ESCDP;
 611    }
 612    else
 613    {
 614  0 this.state = EncodingUtils.FSM_NONASCII;
 615    }
 616  0 break;
 617   
 618  0 case EncodingUtils.FSM_ESCDP :
 619  0 this.state = EncodingUtils.FSM_NONASCII;
 620  0 break;
 621   
 622  0 case EncodingUtils.FSM_ESCP :
 623  0 this.state = EncodingUtils.FSM_ASCII;
 624  0 break;
 625   
 626  0 case EncodingUtils.FSM_NONASCII :
 627  0 c |= 0x80;
 628  0 break;
 629   
 630  0 default :
 631    //
 632  0 break;
 633    }
 634   
 635  0 return c;
 636    }
 637   
 638  6 if (this.encoding == Configuration.UTF16LE)
 639    {
 640  1 int c1;
 641   
 642  1 count[0] = 1;
 643  1 readRawBytesFromStream(tempchar, count, false);
 644  1 if (count[0] <= 0)
 645    {
 646  0 endOfStream = true;
 647  0 return END_OF_STREAM;
 648    }
 649  1 c1 = tempchar[0];
 650   
 651  1 n[0] = (c1 << 8) + c;
 652   
 653  1 return n[0];
 654    }
 655   
 656    // UTF-16 is big-endian by default
 657  5 if ((this.encoding == Configuration.UTF16) || (this.encoding == Configuration.UTF16BE))
 658    {
 659  3 int c1;
 660   
 661  3 count[0] = 1;
 662  3 readRawBytesFromStream(tempchar, count, false);
 663  3 if (count[0] <= 0)
 664    {
 665  0 endOfStream = true;
 666  0 return END_OF_STREAM;
 667    }
 668  3 c1 = tempchar[0];
 669   
 670  3 n[0] = (c << 8) + c1;
 671   
 672  3 return n[0];
 673    }
 674   
 675  2 if (this.encoding == Configuration.UTF8)
 676    {
 677    // deal with UTF-8 encoded char
 678  0 int[] count2 = new int[]{0};
 679   
 680    // first byte "c" is passed in separately
 681  0 boolean err = EncodingUtils.decodeUTF8BytesToChar(n, c, new byte[0], this.getBytes, count2, 0);
 682  0 if (!err && (n[0] == END_OF_STREAM) && (count2[0] == 1)) /* EOF */
 683    {
 684  0 endOfStream = true;
 685  0 return END_OF_STREAM;
 686    }
 687  0 else if (err)
 688    {
 689    /* set error position just before offending character */
 690  0 this.lexer.lines = this.curline;
 691  0 this.lexer.columns = this.curcol;
 692   
 693  0 this.lexer.report.encodingError(this.lexer, (short) (Report.INVALID_UTF8 | Report.REPLACED_CHAR), n[0]);
 694  0 n[0] = 0xFFFD; /* replacement char */
 695    }
 696   
 697  0 return n[0];
 698    }
 699   
 700    // #431953 - start RJ
 701    // This section is suitable for any "multibyte" variable-width character encoding in which a one-byte code is
 702    // less than 128, and the first byte of a two-byte code is greater or equal to 128. Note that Big5 and ShiftJIS
 703    // fit into this kind, even though their second byte may be less than 128
 704   
 705  2 if ((this.encoding == Configuration.BIG5) || (this.encoding == Configuration.SHIFTJIS))
 706    {
 707  0 if (c < 128)
 708    {
 709  0 return c;
 710    }
 711  0 else if ((this.encoding == Configuration.SHIFTJIS) && (c >= 0xa1 && c <= 0xdf))
 712    {
 713    // 461643 - fix suggested by Rick Cameron 14 Sep 01
 714    // for Shift_JIS, the values from 0xa1 through 0xdf represent singe-byte characters (U+FF61 to U+FF9F -
 715    // half-shift Katakana)
 716  0 return c;
 717    }
 718    else
 719    {
 720  0 int c1;
 721  0 count[0] = 1;
 722  0 readRawBytesFromStream(tempchar, count, false);
 723   
 724  0 if (count[0] <= 0)
 725    {
 726  0 endOfStream = true;
 727  0 return END_OF_STREAM;
 728    }
 729   
 730  0 c1 = tempchar[0];
 731  0 n[0] = (c << 8) + c1;
 732  0 return n[0];
 733    }
 734    }
 735    // #431953 - end RJ
 736  2 n[0] = c;
 737   
 738  2 return n[0];
 739    }
 740   
 741    /**
 742    * Read raw bytes from stream, return <= 0 if EOF; or if "unget" is true, Unget the bytes to re-synchronize the
 743    * input stream Normally UTF-8 successor bytes are read using this routine.
 744    * @param buf character buffer
 745    * @param count number of bytes to read
 746    * @param unget unget bytes
 747    */
 748  16 protected void readRawBytesFromStream(int[] buf, int[] count, boolean unget)
 749    {
 750   
 751  16 try
 752    {
 753  16 for (int i = 0; i < count[0]; i++)
 754    {
 755  16 if (unget)
 756    {
 757   
 758  0 int c = this.stream.read();
 759   
 760    // should never get here; testing for 0xFF, a valid char, is not a good idea
 761  0 if (c == END_OF_STREAM) // || buf[i] == (unsigned char)EndOfStream
 762    {
 763  0 count[0] = -i;
 764  0 return;
 765    }
 766   
 767  0 rawPushed = true;
 768   
 769  0 if (rawBufpos >= CHARBUF_SIZE)
 770    {
 771  0 System.arraycopy(rawBytebuf, 1, rawBytebuf, 0, CHARBUF_SIZE - 1);
 772  0 rawBufpos--;
 773    }
 774  0 rawBytebuf[rawBufpos++] = (char) buf[i];
 775    }
 776    else
 777    {
 778  16 if (rawPushed)
 779    {
 780  2 buf[i] = rawBytebuf[--rawBufpos];
 781  2 if (rawBufpos == 0)
 782    {
 783  1 rawPushed = false;
 784    }
 785    }
 786    else
 787    {
 788  14 int c = this.stream.read();
 789  14 if (c == END_OF_STREAM)
 790    {
 791  0 count[0] = -i;
 792  0 break;
 793    }
 794  14 buf[i] = (char) c;
 795    }
 796    }
 797    }
 798    }
 799    catch (IOException e)
 800    {
 801  0 System.err.println("StreamInImpl.readRawBytesFromStream: " + e.toString());
 802    }
 803  16 return;
 804    }
 805   
 806    }