001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.io.input;
019
020import static org.apache.commons.io.IOUtils.EOF;
021
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.CharBuffer;
026import java.nio.charset.CharacterCodingException;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetEncoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.util.Objects;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.charset.CharsetEncoders;
037import org.apache.commons.io.function.Uncheck;
038
039/**
040 * Implements an {@link InputStream} to read from String, StringBuffer, StringBuilder or CharBuffer.
041 * <p>
042 * <strong>Note:</strong> Supports {@link #mark(int)} and {@link #reset()}.
043 * </p>
044 *
045 * @since 2.2
046 */
047public class CharSequenceInputStream extends InputStream {
048
049    /**
050     * Builds a new {@link CharSequenceInputStream} instance.
051     * <p>
052     * For example:
053     * </p>
054     * <h2>Using a Charset</h2>
055     * <pre>{@code
056     * CharSequenceInputStream s = CharSequenceInputStream.builder()
057     *   .setBufferSize(8192)
058     *   .setCharSequence("String")
059     *   .setCharset(Charset.defaultCharset())
060     *   .get();}
061     * </pre>
062     * <h2>Using a CharsetEncoder</h2>
063     * <pre>{@code
064     * CharSequenceInputStream s = CharSequenceInputStream.builder()
065     *   .setBufferSize(8192)
066     *   .setCharSequence("String")
067     *   .setCharsetEncoder(Charset.defaultCharset().newEncoder()
068     *     .onMalformedInput(CodingErrorAction.REPLACE)
069     *     .onUnmappableCharacter(CodingErrorAction.REPLACE))
070     *   .get();}
071     * </pre>
072     *
073     * @since 2.13.0
074     */
075    public static class Builder extends AbstractStreamBuilder<CharSequenceInputStream, Builder> {
076
077        private CharsetEncoder charsetEncoder = newEncoder(getCharset());
078
079        /**
080         * Constructs a new instance.
081         * <p>
082         * This builder use the aspects the CharSequence, buffer size, and Charset.
083         * </p>
084         *
085         * @return a new instance.
086         * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
087         */
088        @Override
089        public CharSequenceInputStream get() {
090            return Uncheck.get(() -> new CharSequenceInputStream(getCharSequence(), getBufferSize(), charsetEncoder));
091        }
092
093        CharsetEncoder getCharsetEncoder() {
094            return charsetEncoder;
095        }
096
097        @Override
098        public Builder setCharset(final Charset charset) {
099            super.setCharset(charset);
100            charsetEncoder = newEncoder(getCharset());
101            return this;
102        }
103
104        /**
105         * Sets the charset encoder. Assumes that the caller has configured the encoder.
106         *
107         * @param newEncoder the charset encoder.
108         * @return this
109         * @since 2.13.0
110         */
111        public Builder setCharsetEncoder(final CharsetEncoder newEncoder) {
112            charsetEncoder = CharsetEncoders.toCharsetEncoder(newEncoder, () -> newEncoder(getCharsetDefault()));
113            super.setCharset(charsetEncoder.charset());
114            return this;
115        }
116
117    }
118
119    private static final int NO_MARK = -1;
120
121    /**
122     * Constructs a new {@link Builder}.
123     *
124     * @return a new {@link Builder}.
125     * @since 2.12.0
126     */
127    public static Builder builder() {
128        return new Builder();
129    }
130
131    private static CharsetEncoder newEncoder(final Charset charset) {
132        // @formatter:off
133        return Charsets.toCharset(charset).newEncoder()
134                .onMalformedInput(CodingErrorAction.REPLACE)
135                .onUnmappableCharacter(CodingErrorAction.REPLACE);
136        // @formatter:on
137    }
138
139    private final ByteBuffer bBuf;
140    private int bBufMark; // position in bBuf
141    private final CharBuffer cBuf;
142    private int cBufMark; // position in cBuf
143    private final CharsetEncoder charsetEncoder;
144
145    /**
146     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
147     *
148     * @param cs the input character sequence.
149     * @param charset the character set name to use.
150     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
151     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
152     */
153    @Deprecated
154    public CharSequenceInputStream(final CharSequence cs, final Charset charset) {
155        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
156    }
157
158    /**
159     * Constructs a new instance.
160     *
161     * @param cs the input character sequence.
162     * @param charset the character set name to use, null maps to the default Charset.
163     * @param bufferSize the buffer size to use.
164     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
165     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
166     */
167    @Deprecated
168    public CharSequenceInputStream(final CharSequence cs, final Charset charset, final int bufferSize) {
169        // @formatter:off
170        this(cs, bufferSize, newEncoder(charset));
171        // @formatter:on
172    }
173
174    private CharSequenceInputStream(final CharSequence cs, final int bufferSize, final CharsetEncoder charsetEncoder) {
175        this.charsetEncoder = charsetEncoder;
176        // Ensure that buffer is long enough to hold a complete character
177        this.bBuf = ByteBuffer.allocate(ReaderInputStream.checkMinBufferSize(charsetEncoder, bufferSize));
178        this.bBuf.flip();
179        this.cBuf = CharBuffer.wrap(cs);
180        this.cBufMark = NO_MARK;
181        this.bBufMark = NO_MARK;
182    }
183
184    /**
185     * Constructs a new instance with a buffer size of {@link IOUtils#DEFAULT_BUFFER_SIZE}.
186     *
187     * @param cs the input character sequence.
188     * @param charset the character set name to use.
189     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
190     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
191     */
192    @Deprecated
193    public CharSequenceInputStream(final CharSequence cs, final String charset) {
194        this(cs, charset, IOUtils.DEFAULT_BUFFER_SIZE);
195    }
196
197    /**
198     * Constructs a new instance.
199     *
200     * @param cs the input character sequence.
201     * @param charset the character set name to use, null maps to the default Charset.
202     * @param bufferSize the buffer size to use.
203     * @throws IllegalArgumentException if the buffer is not large enough to hold a complete character.
204     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}
205     */
206    @Deprecated
207    public CharSequenceInputStream(final CharSequence cs, final String charset, final int bufferSize) {
208        this(cs, Charsets.toCharset(charset), bufferSize);
209    }
210
211    /**
212     * Return an estimate of the number of bytes remaining in the byte stream.
213     * @return the count of bytes that can be read without blocking (or returning EOF).
214     *
215     * @throws IOException if an error occurs (probably not possible).
216     */
217    @Override
218    public int available() throws IOException {
219        // The cached entries are in bBuf; since encoding always creates at least one byte
220        // per character, we can add the two to get a better estimate (e.g. if bBuf is empty)
221        // Note that the implementation in 2.4 could return zero even though there were
222        // encoded bytes still available.
223        return this.bBuf.remaining() + this.cBuf.remaining();
224    }
225
226    @Override
227    public void close() throws IOException {
228        // noop
229    }
230
231    /**
232     * Fills the byte output buffer from the input char buffer.
233     *
234     * @throws CharacterCodingException
235     *             an error encoding data.
236     */
237    private void fillBuffer() throws CharacterCodingException {
238        this.bBuf.compact();
239        final CoderResult result = this.charsetEncoder.encode(this.cBuf, this.bBuf, true);
240        if (result.isError()) {
241            result.throwException();
242        }
243        this.bBuf.flip();
244    }
245
246    /**
247     * Gets the CharsetEncoder.
248     *
249     * @return the CharsetEncoder.
250     */
251    CharsetEncoder getCharsetEncoder() {
252        return charsetEncoder;
253    }
254
255    /**
256     * {@inheritDoc}
257     * @param readlimit max read limit (ignored).
258     */
259    @Override
260    public synchronized void mark(final int readlimit) {
261        this.cBufMark = this.cBuf.position();
262        this.bBufMark = this.bBuf.position();
263        this.cBuf.mark();
264        this.bBuf.mark();
265        // It would be nice to be able to use mark & reset on the cBuf and bBuf;
266        // however the bBuf is re-used so that won't work
267    }
268
269    @Override
270    public boolean markSupported() {
271        return true;
272    }
273
274    @Override
275    public int read() throws IOException {
276        for (;;) {
277            if (this.bBuf.hasRemaining()) {
278                return this.bBuf.get() & 0xFF;
279            }
280            fillBuffer();
281            if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
282                return EOF;
283            }
284        }
285    }
286
287    @Override
288    public int read(final byte[] b) throws IOException {
289        return read(b, 0, b.length);
290    }
291
292    @Override
293    public int read(final byte[] array, int off, int len) throws IOException {
294        Objects.requireNonNull(array, "array");
295        if (len < 0 || off + len > array.length) {
296            throw new IndexOutOfBoundsException("Array Size=" + array.length + ", offset=" + off + ", length=" + len);
297        }
298        if (len == 0) {
299            return 0; // must return 0 for zero length read
300        }
301        if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
302            return EOF;
303        }
304        int bytesRead = 0;
305        while (len > 0) {
306            if (this.bBuf.hasRemaining()) {
307                final int chunk = Math.min(this.bBuf.remaining(), len);
308                this.bBuf.get(array, off, chunk);
309                off += chunk;
310                len -= chunk;
311                bytesRead += chunk;
312            } else {
313                fillBuffer();
314                if (!this.bBuf.hasRemaining() && !this.cBuf.hasRemaining()) {
315                    break;
316                }
317            }
318        }
319        return bytesRead == 0 && !this.cBuf.hasRemaining() ? EOF : bytesRead;
320    }
321
322    @Override
323    public synchronized void reset() throws IOException {
324        //
325        // This is not the most efficient implementation, as it re-encodes from the beginning.
326        //
327        // Since the bBuf is re-used, in general it's necessary to re-encode the data.
328        //
329        // It should be possible to apply some optimizations however:
330        // + use mark/reset on the cBuf and bBuf. This would only work if the buffer had not been (re)filled since
331        // the mark. The code would have to catch InvalidMarkException - does not seem possible to check if mark is
332        // valid otherwise. + Try saving the state of the cBuf before each fillBuffer; it might be possible to
333        // restart from there.
334        //
335        if (this.cBufMark != NO_MARK) {
336            // if cBuf is at 0, we have not started reading anything, so skip re-encoding
337            if (this.cBuf.position() != 0) {
338                this.charsetEncoder.reset();
339                this.cBuf.rewind();
340                this.bBuf.rewind();
341                this.bBuf.limit(0); // rewind does not clear the buffer
342                while (this.cBuf.position() < this.cBufMark) {
343                    this.bBuf.rewind(); // empty the buffer (we only refill when empty during normal processing)
344                    this.bBuf.limit(0);
345                    fillBuffer();
346                }
347            }
348            if (this.cBuf.position() != this.cBufMark) {
349                throw new IllegalStateException("Unexpected CharBuffer position: actual=" + cBuf.position() + " " +
350                        "expected=" + this.cBufMark);
351            }
352            this.bBuf.position(this.bBufMark);
353            this.cBufMark = NO_MARK;
354            this.bBufMark = NO_MARK;
355        }
356    }
357
358    @Override
359    public long skip(long n) throws IOException {
360        //
361        // This could be made more efficient by using position to skip within the current buffer.
362        //
363        long skipped = 0;
364        while (n > 0 && available() > 0) {
365            this.read();
366            n--;
367            skipped++;
368        }
369        return skipped;
370    }
371
372}