001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.input; 018 019import static org.apache.commons.io.IOUtils.EOF; 020 021import java.io.BufferedInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.nio.ByteBuffer; 025import java.nio.channels.FileChannel; 026import java.nio.channels.FileChannel.MapMode; 027import java.nio.file.Path; 028import java.nio.file.StandardOpenOption; 029 030import org.apache.commons.io.build.AbstractOrigin; 031import org.apache.commons.io.build.AbstractStreamBuilder; 032 033/** 034 * An {@link InputStream} that utilizes memory mapped files to improve performance. A sliding window of the file is 035 * mapped to memory to avoid mapping the entire file to memory at one time. The size of the sliding buffer is 036 * configurable. 037 * <p> 038 * For most operating systems, mapping a file into memory is more expensive than reading or writing a few tens of 039 * kilobytes of data. From the standpoint of performance. it is generally only worth mapping relatively large files into 040 * memory. 041 * </p> 042 * <p> 043 * Note: Use of this class does not necessarily obviate the need to use a {@link BufferedInputStream}. Depending on the 044 * use case, the use of buffering may still further improve performance. For example: 045 * </p> 046 * <p> 047 * To build an instance, see {@link Builder}. 048 * </p> 049 * <pre>{@code 050 * BufferedInputStream s = new BufferedInputStream(new GzipInputStream( 051 * MemoryMappedFileInputStream.builder() 052 * .setPath(path) 053 * .setBufferSize(256 * 1024) 054 * .get()));} 055 * </pre> 056 * <p> 057 * should outperform: 058 * </p> 059 * <pre> 060 * new GzipInputStream(new MemoryMappedFileInputStream(path)) 061 * </pre> 062 * <pre>{@code 063 * GzipInputStream s = new GzipInputStream( 064 * MemoryMappedFileInputStream.builder() 065 * .setPath(path) 066 * .setBufferSize(256 * 1024) 067 * .get());} 068 * </pre> 069 * 070 * @since 2.12.0 071 */ 072public final class MemoryMappedFileInputStream extends InputStream { 073 074 /** 075 * Builds a new {@link MemoryMappedFileInputStream} instance. 076 * <p> 077 * For example: 078 * </p> 079 * <pre>{@code 080 * MemoryMappedFileInputStream s = MemoryMappedFileInputStream.builder() 081 * .setPath(path) 082 * .setBufferSize(256 * 1024) 083 * .get();} 084 * </pre> 085 * 086 * @since 2.12.0 087 */ 088 public static class Builder extends AbstractStreamBuilder<MemoryMappedFileInputStream, Builder> { 089 090 public Builder() { 091 setBufferSizeDefault(DEFAULT_BUFFER_SIZE); 092 setBufferSize(DEFAULT_BUFFER_SIZE); 093 } 094 095 /** 096 * Constructs a new instance. 097 * <p> 098 * This builder use the aspects Path and buffer size. 099 * </p> 100 * <p> 101 * You must provide an origin that can be converted to a Path by this builder, otherwise, this call will throw an 102 * {@link UnsupportedOperationException}. 103 * </p> 104 * 105 * @return a new instance. 106 * @throws UnsupportedOperationException if the origin cannot provide a Path. 107 * @see AbstractOrigin#getPath() 108 */ 109 @Override 110 public MemoryMappedFileInputStream get() throws IOException { 111 return new MemoryMappedFileInputStream(getPath(), getBufferSize()); 112 } 113 } 114 115 /** 116 * Default size of the sliding memory mapped buffer. We use 256K, equal to 65536 pages (given a 4K page size). 117 * Increasing the value beyond the default size will generally not provide any increase in throughput. 118 */ 119 private static final int DEFAULT_BUFFER_SIZE = 256 * 1024; 120 121 private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.wrap(new byte[0]).asReadOnlyBuffer(); 122 123 /** 124 * Constructs a new {@link Builder}. 125 * 126 * @return a new {@link Builder}. 127 * @since 2.12.0 128 */ 129 public static Builder builder() { 130 return new Builder(); 131 } 132 133 private final int bufferSize; 134 private final FileChannel channel; 135 private ByteBuffer buffer = EMPTY_BUFFER; 136 private boolean closed; 137 138 /** 139 * The starting position (within the file) of the next sliding buffer. 140 */ 141 private long nextBufferPosition; 142 143 /** 144 * Constructs a new instance. 145 * 146 * @param file The path of the file to open. 147 * @param bufferSize Size of the sliding buffer. 148 * @throws IOException If an I/O error occurs. 149 */ 150 private MemoryMappedFileInputStream(final Path file, final int bufferSize) throws IOException { 151 this.bufferSize = bufferSize; 152 this.channel = FileChannel.open(file, StandardOpenOption.READ); 153 } 154 155 @Override 156 public int available() throws IOException { 157 return buffer.remaining(); 158 } 159 160 private void cleanBuffer() { 161 if (ByteBufferCleaner.isSupported() && buffer.isDirect()) { 162 ByteBufferCleaner.clean(buffer); 163 } 164 } 165 166 @Override 167 public void close() throws IOException { 168 if (!closed) { 169 cleanBuffer(); 170 buffer = null; 171 channel.close(); 172 closed = true; 173 } 174 } 175 176 private void ensureOpen() throws IOException { 177 if (closed) { 178 throw new IOException("Stream closed"); 179 } 180 } 181 182 int getBufferSize() { 183 return bufferSize; 184 } 185 186 private void nextBuffer() throws IOException { 187 final long remainingInFile = channel.size() - nextBufferPosition; 188 if (remainingInFile > 0) { 189 final long amountToMap = Math.min(remainingInFile, bufferSize); 190 cleanBuffer(); 191 buffer = channel.map(MapMode.READ_ONLY, nextBufferPosition, amountToMap); 192 nextBufferPosition += amountToMap; 193 } else { 194 buffer = EMPTY_BUFFER; 195 } 196 } 197 198 @Override 199 public int read() throws IOException { 200 ensureOpen(); 201 if (!buffer.hasRemaining()) { 202 nextBuffer(); 203 if (!buffer.hasRemaining()) { 204 return EOF; 205 } 206 } 207 return Short.toUnsignedInt(buffer.get()); 208 } 209 210 @Override 211 public int read(final byte[] b, final int off, final int len) throws IOException { 212 ensureOpen(); 213 if (!buffer.hasRemaining()) { 214 nextBuffer(); 215 if (!buffer.hasRemaining()) { 216 return EOF; 217 } 218 } 219 final int numBytes = Math.min(buffer.remaining(), len); 220 buffer.get(b, off, numBytes); 221 return numBytes; 222 } 223 224 @Override 225 public long skip(final long n) throws IOException { 226 ensureOpen(); 227 if (n <= 0) { 228 return 0; 229 } 230 if (n <= buffer.remaining()) { 231 buffer.position((int) (buffer.position() + n)); 232 return n; 233 } 234 final long remainingInFile = channel.size() - nextBufferPosition; 235 final long skipped = buffer.remaining() + Math.min(remainingInFile, n - buffer.remaining()); 236 nextBufferPosition += skipped - buffer.remaining(); 237 nextBuffer(); 238 return skipped; 239 } 240 241}