001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.input;
018
019import static org.apache.commons.io.IOUtils.EOF;
020
021import java.io.BufferedInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.nio.ByteBuffer;
025import java.nio.channels.FileChannel;
026import java.nio.channels.FileChannel.MapMode;
027import java.nio.file.Path;
028import java.nio.file.StandardOpenOption;
029
030import org.apache.commons.io.build.AbstractOrigin;
031import org.apache.commons.io.build.AbstractStreamBuilder;
032
033/**
034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is
035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is
036 * configurable.
037 * <p>
038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of
039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into
040 * memory.
041 * </p>
042 * <p>
043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the
044 * use case, the use of buffering may still further improve performance. For example:
045 * </p>
046 * <p>
047 * To build an instance, see {@link Builder}.
048 * </p>
049 * <pre>{@code
050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream(
051 *   MemoryMappedFileInputStream.builder()
052 *     .setPath(path)
053 *     .setBufferSize(256 * 1024)
054 *     .get()));}
055 * </pre>
056 * <p>
057 * should outperform:
058 * </p>
059 * <pre>
060 * new GzipInputStream(new MemoryMappedFileInputStream(path))
061 * </pre>
062 * <pre>{@code
063 * GzipInputStream s = new GzipInputStream(
064 *   MemoryMappedFileInputStream.builder()
065 *     .setPath(path)
066 *     .setBufferSize(256 * 1024)
067 *     .get());}
068 * </pre>
069 *
070 * @since 2.12.0
071 */
072public final class MemoryMappedFileInputStream extends InputStream {
073
074    /**
075     * Builds a new {@link MemoryMappedFileInputStream} instance.
076     * <p>
077     * For example:
078     * </p>
079     * <pre>{@code
080     * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder()
081     *   .setPath(path)
082     *   .setBufferSize(256 * 1024)
083     *   .get();}
084     * </pre>
085     *
086     * @since 2.12.0
087     */
088    public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> {
089
090        public Builder() {
091            setBufferSizeDefault(DEFAULT_BUFFER_SIZE);
092            setBufferSize(DEFAULT_BUFFER_SIZE);
093        }
094
095        /**
096         * Constructs a new instance.
097         * <p>
098         * This builder use the aspects Path and buffer size.
099         * </p>
100         * <p>
101         * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an
102         * {@link UnsupportedOperationException}.
103         * </p>
104         *
105         * @return a new instance.
106         * @throws UnsupportedOperationException if the origin cannot provide a Path.
107         * @see AbstractOrigin#getPath()
108         */
109        @Override
110        public MemoryMappedFileInputStream get() throws IOException {
111            return new MemoryMappedFileInputStream(getPath(), getBufferSize());
112        }
113    }
114
115    /**
116     * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size).
117     * Increasing the value beyond the default size will generally not provide any increase in throughput.
118     */
119    private static final int DEFAULT_BUFFER_SIZE = 256 * 1024;
120
121    private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer();
122
123    /**
124     * Constructs a new {@link Builder}.
125     *
126     * @return a new {@link Builder}.
127     * @since 2.12.0
128     */
129    public static Builder builder() {
130        return new Builder();
131    }
132
133    private final int bufferSize;
134    private final FileChannel channel;
135    private ByteBuffer buffer = EMPTY_BUFFER;
136    private boolean closed;
137
138    /**
139     * The starting position (within the file) of the next sliding buffer.
140     */
141    private long nextBufferPosition;
142
143    /**
144     * Constructs a new instance.
145     *
146     * @param file The path of the file to open.
147     * @param bufferSize Size of the sliding buffer.
148     * @throws IOException If an I/O error occurs.
149     */
150    private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException {
151        this.bufferSize = bufferSize;
152        this.channel = FileChannel.open(file, StandardOpenOption.READ);
153    }
154
155    @Override
156    public int available() throws IOException {
157        return buffer.remaining();
158    }
159
160    private void cleanBuffer() {
161        if (ByteBufferCleaner.isSupported() && buffer.isDirect()) {
162            ByteBufferCleaner.clean(buffer);
163        }
164    }
165
166    @Override
167    public void close() throws IOException {
168        if (!closed) {
169            cleanBuffer();
170            buffer = null;
171            channel.close();
172            closed = true;
173        }
174    }
175
176    private void ensureOpen() throws IOException {
177        if (closed) {
178            throw new IOException("Stream closed");
179        }
180    }
181
182    int getBufferSize() {
183        return bufferSize;
184    }
185
186    private void nextBuffer() throws IOException {
187        final long remainingInFile = channel.size() - nextBufferPosition;
188        if (remainingInFile > 0) {
189            final long amountToMap = Math.min(remainingInFile, bufferSize);
190            cleanBuffer();
191            buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap);
192            nextBufferPosition += amountToMap;
193        } else {
194            buffer = EMPTY_BUFFER;
195        }
196    }
197
198    @Override
199    public int read() throws IOException {
200        ensureOpen();
201        if (!buffer.hasRemaining()) {
202            nextBuffer();
203            if (!buffer.hasRemaining()) {
204                return EOF;
205            }
206        }
207        return Short.toUnsignedInt(buffer.get());
208    }
209
210    @Override
211    public int read(final byte[] b, final int off, final int len) throws IOException {
212        ensureOpen();
213        if (!buffer.hasRemaining()) {
214            nextBuffer();
215            if (!buffer.hasRemaining()) {
216                return EOF;
217            }
218        }
219        final int numBytes = Math.min(buffer.remaining(), len);
220        buffer.get(b, off, numBytes);
221        return numBytes;
222    }
223
224    @Override
225    public long skip(final long n) throws IOException {
226        ensureOpen();
227        if (n <= 0) {
228            return 0;
229        }
230        if (n <= buffer.remaining()) {
231            buffer.position((int) (buffer.position() + n));
232            return n;
233        }
234        final long remainingInFile = channel.size() - nextBufferPosition;
235        final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining());
236        nextBufferPosition += skipped - buffer.remaining();
237        nextBuffer();
238        return skipped;
239    }
240
241}