001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.io.input; 019 020import static org.apache.commons.io.IOUtils.EOF; 021 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.CharBuffer; 026import java.nio.charset.CharacterCodingException; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetEncoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.util.Objects; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetEncoders; 037import org.apache.commons.io.function.Uncheck; 038 039/** 040 * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer. 041 * <p> 042 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}. 043 * </p> 044 * 045 * @since 2.2 046 */ 047public class CharSequenceInputStream extends InputStream { 048 049 /** 050 * Builds a new {@link CharSequenceInputStream} instance. 051 * <p> 052 * For example: 053 * </p> 054 * <h2>Using a Charset</h2> 055 * <pre>{@code 056 * CharSequenceInputStream s = CharSequenceInputStream.builder() 057 * .setBufferSize(8192) 058 * .setCharSequence("String") 059 * .setCharset(Charset.defaultCharset()) 060 * .get();} 061 * </pre> 062 * <h2>Using a CharsetEncoder</h2> 063 * <pre>{@code 064 * CharSequenceInputStream s = CharSequenceInputStream.builder() 065 * .setBufferSize(8192) 066 * .setCharSequence("String") 067 * .setCharsetEncoder(Charset.defaultCharset().newEncoder() 068 * .onMalformedInput(CodingErrorAction.REPLACE) 069 * .onUnmappableCharacter(CodingErrorAction.REPLACE)) 070 * .get();} 071 * </pre> 072 * 073 * @since 2.13.0 074 */ 075 public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> { 076 077 private CharsetEncoder charsetEncoder = newEncoder(getCharset()); 078 079 /** 080 * Constructs a new instance. 081 * <p> 082 * This builder use the aspects the CharSequence, buffer size, and Charset. 083 * </p> 084 * 085 * @return a new instance. 086 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 087 */ 088 @Override 089 public CharSequenceInputStream get() { 090 return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder)); 091 } 092 093 CharsetEncoder getCharsetEncoder() { 094 return charsetEncoder; 095 } 096 097 @Override 098 public Builder setCharset(final Charset charset) { 099 super.setCharset(charset); 100 charsetEncoder = newEncoder(getCharset()); 101 return this; 102 } 103 104 /** 105 * Sets the charset encoder. Assumes that the caller has configured the encoder. 106 * 107 * @param newEncoder the charset encoder. 108 * @return this 109 * @since 2.13.0 110 */ 111 public Builder setCharsetEncoder(final CharsetEncoder newEncoder) { 112 charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault())); 113 super.setCharset(charsetEncoder.charset()); 114 return this; 115 } 116 117 } 118 119 private static final int NO_MARK = -1; 120 121 /** 122 * Constructs a new {@link Builder}. 123 * 124 * @return a new {@link Builder}. 125 * @since 2.12.0 126 */ 127 public static Builder builder() { 128 return new Builder(); 129 } 130 131 private static CharsetEncoder newEncoder(final Charset charset) { 132 // @formatter:off 133 return Charsets.toCharset(charset).newEncoder() 134 .onMalformedInput(CodingErrorAction.REPLACE) 135 .onUnmappableCharacter(CodingErrorAction.REPLACE); 136 // @formatter:on 137 } 138 139 private final ByteBuffer bBuf; 140 private int bBufMark; // position in bBuf 141 private final CharBuffer cBuf; 142 private int cBufMark; // position in cBuf 143 private final CharsetEncoder charsetEncoder; 144 145 /** 146 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 147 * 148 * @param cs the input character sequence. 149 * @param charset the character set name to use. 150 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 151 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 152 */ 153 @Deprecated 154 public CharSequenceInputStream(final CharSequence cs, final Charset charset) { 155 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 156 } 157 158 /** 159 * Constructs a new instance. 160 * 161 * @param cs the input character sequence. 162 * @param charset the character set name to use, null maps to the default Charset. 163 * @param bufferSize the buffer size to use. 164 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 165 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 166 */ 167 @Deprecated 168 public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) { 169 // @formatter:off 170 this(cs, bufferSize, newEncoder(charset)); 171 // @formatter:on 172 } 173 174 private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) { 175 this.charsetEncoder = charsetEncoder; 176 // Ensure that buffer is long enough to hold a complete character 177 this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize)); 178 this.bBuf.flip(); 179 this.cBuf = CharBuffer.wrap(cs); 180 this.cBufMark = NO_MARK; 181 this.bBufMark = NO_MARK; 182 } 183 184 /** 185 * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}. 186 * 187 * @param cs the input character sequence. 188 * @param charset the character set name to use. 189 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 190 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 191 */ 192 @Deprecated 193 public CharSequenceInputStream(final CharSequence cs, final String charset) { 194 this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE); 195 } 196 197 /** 198 * Constructs a new instance. 199 * 200 * @param cs the input character sequence. 201 * @param charset the character set name to use, null maps to the default Charset. 202 * @param bufferSize the buffer size to use. 203 * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character. 204 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()} 205 */ 206 @Deprecated 207 public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) { 208 this(cs, Charsets.toCharset(charset), bufferSize); 209 } 210 211 /** 212 * Return an estimate of the number of bytes remaining in the byte stream. 213 * @return the count of bytes that can be read without blocking (or returning EOF). 214 * 215 * @throws IOException if an error occurs (probably not possible). 216 */ 217 @Override 218 public int available() throws IOException { 219 // The cached entries are in bBuf; since encoding always creates at least one byte 220 // per character, we can add the two to get a better estimate (e.g. if bBuf is empty) 221 // Note that the implementation in 2.4 could return zero even though there were 222 // encoded bytes still available. 223 return this.bBuf.remaining() + this.cBuf.remaining(); 224 } 225 226 @Override 227 public void close() throws IOException { 228 // noop 229 } 230 231 /** 232 * Fills the byte output buffer from the input char buffer. 233 * 234 * @throws CharacterCodingException 235 * an error encoding data. 236 */ 237 private void fillBuffer() throws CharacterCodingException { 238 this.bBuf.compact(); 239 final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true); 240 if (result.isError()) { 241 result.throwException(); 242 } 243 this.bBuf.flip(); 244 } 245 246 /** 247 * Gets the CharsetEncoder. 248 * 249 * @return the CharsetEncoder. 250 */ 251 CharsetEncoder getCharsetEncoder() { 252 return charsetEncoder; 253 } 254 255 /** 256 * {@inheritDoc} 257 * @param readlimit max read limit (ignored). 258 */ 259 @Override 260 public synchronized void mark(final int readlimit) { 261 this.cBufMark = this.cBuf.position(); 262 this.bBufMark = this.bBuf.position(); 263 this.cBuf.mark(); 264 this.bBuf.mark(); 265 // It would be nice to be able to use mark & reset on the cBuf and bBuf; 266 // however the bBuf is re-used so that won't work 267 } 268 269 @Override 270 public boolean markSupported() { 271 return true; 272 } 273 274 @Override 275 public int read() throws IOException { 276 for (;;) { 277 if (this.bBuf.hasRemaining()) { 278 return this.bBuf.get() & 0xFF; 279 } 280 fillBuffer(); 281 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 282 return EOF; 283 } 284 } 285 } 286 287 @Override 288 public int read(final byte[] b) throws IOException { 289 return read(b, 0, b.length); 290 } 291 292 @Override 293 public int read(final byte[] array, int off, int len) throws IOException { 294 Objects.requireNonNull(array, "array"); 295 if (len < 0 || off + len > array.length) { 296 throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len); 297 } 298 if (len == 0) { 299 return 0; // must return 0 for zero length read 300 } 301 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 302 return EOF; 303 } 304 int bytesRead = 0; 305 while (len > 0) { 306 if (this.bBuf.hasRemaining()) { 307 final int chunk = Math.min(this.bBuf.remaining(), len); 308 this.bBuf.get(array, off, chunk); 309 off += chunk; 310 len -= chunk; 311 bytesRead += chunk; 312 } else { 313 fillBuffer(); 314 if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) { 315 break; 316 } 317 } 318 } 319 return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead; 320 } 321 322 @Override 323 public synchronized void reset() throws IOException { 324 // 325 // This is not the most efficient implementation, as it re-encodes from the beginning. 326 // 327 // Since the bBuf is re-used, in general it's necessary to re-encode the data. 328 // 329 // It should be possible to apply some optimizations however: 330 // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since 331 // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is 332 // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to 333 // restart from there. 334 // 335 if (this.cBufMark != NO_MARK) { 336 // if cBuf is at 0, we have not started reading anything, so skip re-encoding 337 if (this.cBuf.position() != 0) { 338 this.charsetEncoder.reset(); 339 this.cBuf.rewind(); 340 this.bBuf.rewind(); 341 this.bBuf.limit(0); // rewind does not clear the buffer 342 while (this.cBuf.position() < this.cBufMark) { 343 this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing) 344 this.bBuf.limit(0); 345 fillBuffer(); 346 } 347 } 348 if (this.cBuf.position() != this.cBufMark) { 349 throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " + 350 "expected=" + this.cBufMark); 351 } 352 this.bBuf.position(this.bBufMark); 353 this.cBufMark = NO_MARK; 354 this.bBufMark = NO_MARK; 355 } 356 } 357 358 @Override 359 public long skip(long n) throws IOException { 360 // 361 // This could be made more efficient by using position to skip within the current buffer. 362 // 363 long skipped = 0; 364 while (n > 0 && available() > 0) { 365 this.read(); 366 n--; 367 skipped++; 368 } 369 return skipped; 370 } 371 372}