-
Notifications
You must be signed in to change notification settings - Fork 50
STF-383: Fix off-heap memory growth in FileMode.MEMORY #368
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4ab1768
e2be3a5
cbf83d9
2624387
232dd73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| # @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html | ||
|
|
||
| [[tools.java]] | ||
| version = "26.0.1" | ||
| backend = "core:java" | ||
|
|
||
| [tools.java."platforms.linux-x64"] | ||
| checksum = "sha256:2f2802d57b5fc414f1ddf6648ba12cc9a6454cf67b32ac95407c018f2e6ab0b0" | ||
| url = "https://download.java.net/java/GA/jdk26.0.1/458fda22e4c54d5ba572ab8d2b22eb83/8/GPL/openjdk-26.0.1_linux-x64_bin.tar.gz" | ||
|
|
||
| [[tools.maven]] | ||
| version = "3.9.15" | ||
| backend = "aqua:apache/maven" | ||
|
|
||
| [tools.maven."platforms.linux-arm64"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.linux-arm64-musl"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.linux-x64"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.linux-x64-musl"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.macos-arm64"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.macos-x64"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" | ||
|
|
||
| [tools.maven."platforms.windows-x64"] | ||
| url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| [settings] | ||
| experimental = true | ||
| lockfile = true | ||
| disable_backends = [ | ||
| "asdf", | ||
| "vfox", | ||
| ] | ||
|
|
||
| [tools] | ||
| java = "latest" | ||
| maven = "latest" | ||
|
|
||
| [hooks] | ||
| enter = "mise install --quiet --locked" | ||
|
|
||
| [[watch_files]] | ||
| patterns = ["mise.toml", "mise.lock"] | ||
| run = "mise install --quiet --locked" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
| import com.maxmind.db.Reader.FileMode; | ||
| import java.io.ByteArrayOutputStream; | ||
| import java.io.File; | ||
| import java.io.FileInputStream; | ||
| import java.io.IOException; | ||
| import java.io.InputStream; | ||
| import java.io.RandomAccessFile; | ||
|
|
@@ -23,53 +24,38 @@ final class BufferHolder { | |
| } | ||
|
|
||
| BufferHolder(File database, FileMode mode, int chunkSize) throws IOException { | ||
| try (RandomAccessFile file = new RandomAccessFile(database, "r"); | ||
| FileChannel channel = file.getChannel()) { | ||
| long size = channel.size(); | ||
| if (mode == FileMode.MEMORY) { | ||
| if (mode == FileMode.MEMORY) { | ||
| // FileInputStream avoids the per-thread direct ByteBuffer cache that | ||
| // FileChannel.read() populates when reading into a heap buffer. That cache | ||
| // retains the largest direct buffer ever requested — under chunked MEMORY | ||
| // mode that would mean chunkSize bytes of off-heap memory held per loader | ||
| // thread for the JVM's lifetime. | ||
| try (FileInputStream stream = new FileInputStream(database)) { | ||
| // Size from the open fd (fstat) so it's atomic with the open; | ||
| // getChannel().size() does not populate the buffer cache. | ||
| long size = stream.getChannel().size(); | ||
| var name = database.getName(); | ||
| if (size <= chunkSize) { | ||
| // Allocate, read, and make read-only | ||
| ByteBuffer buffer = ByteBuffer.allocate((int) size); | ||
| if (channel.read(buffer) != size) { | ||
| throw new IOException("Unable to read " | ||
| + database.getName() | ||
| + " into memory. Unexpected end of stream."); | ||
| } | ||
| buffer.flip(); | ||
| this.buffer = new SingleBuffer(buffer); | ||
| this.buffer = SingleBuffer.wrap(readFully(stream, (int) size, name)); | ||
| } else { | ||
| // Allocate chunks, read, and make read-only | ||
| var fullChunks = (int) (size / chunkSize); | ||
| var remainder = (int) (size % chunkSize); | ||
| var totalChunks = fullChunks + (remainder > 0 ? 1 : 0); | ||
| var buffers = new ByteBuffer[totalChunks]; | ||
|
|
||
| for (int i = 0; i < fullChunks; i++) { | ||
| buffers[i] = ByteBuffer.allocate(chunkSize); | ||
| buffers[i] = ByteBuffer.wrap(readFully(stream, chunkSize, name)); | ||
| } | ||
| if (remainder > 0) { | ||
| buffers[totalChunks - 1] = ByteBuffer.allocate(remainder); | ||
| buffers[totalChunks - 1] = ByteBuffer.wrap( | ||
| readFully(stream, remainder, name)); | ||
| } | ||
|
|
||
| var totalRead = 0L; | ||
| for (var buffer : buffers) { | ||
| var read = channel.read(buffer); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interesting: Not sure if we'd want to mention in the changelog.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Claude replying on Greg's behalf.) Done in 232dd73. Traced the introducing commit via Added a second bullet under the 4.1.0 section noting:
The single-chunk MEMORY path has the same |
||
| if (read == -1) { | ||
| break; | ||
| } | ||
| totalRead += read; | ||
| buffer.flip(); | ||
| } | ||
|
|
||
| if (totalRead != size) { | ||
| throw new IOException("Unable to read " | ||
| + database.getName() | ||
| + " into memory. Unexpected end of stream."); | ||
| } | ||
|
|
||
| this.buffer = new MultiBuffer(buffers, chunkSize); | ||
| } | ||
| } else { | ||
| } | ||
| } else { | ||
| try (RandomAccessFile file = new RandomAccessFile(database, "r"); | ||
| FileChannel channel = file.getChannel()) { | ||
| long size = channel.size(); | ||
| if (size <= chunkSize) { | ||
| this.buffer = SingleBuffer.mapFromChannel(channel); | ||
| } else { | ||
|
|
@@ -79,11 +65,32 @@ final class BufferHolder { | |
| } | ||
| } | ||
|
|
||
| BufferHolder(InputStream stream, int chunkSize) throws IOException { | ||
| BufferHolder(InputStream stream, int chunkSize) throws IOException { | ||
| if (null == stream) { | ||
| throw new NullPointerException("Unable to use a NULL InputStream"); | ||
| } | ||
| this.buffer = readFromStream(stream, chunkSize); | ||
| } | ||
|
|
||
| // Pre-allocates exactly len bytes. Used by file-backed MEMORY mode where the size is | ||
| // known up front, avoiding the transient peak from ByteArrayOutputStream.grow() and | ||
| // the defensive copy in toByteArray(). | ||
| private static byte[] readFully(InputStream stream, int len, String name) throws IOException { | ||
| var data = new byte[len]; | ||
| var totalRead = 0; | ||
| while (totalRead < len) { | ||
| var n = stream.read(data, totalRead, len - totalRead); | ||
| if (n < 0) { | ||
| throw new IOException("Unable to read " | ||
| + name | ||
| + " into memory. Unexpected end of stream."); | ||
| } | ||
| totalRead += n; | ||
| } | ||
| return data; | ||
| } | ||
|
|
||
| private static Buffer readFromStream(InputStream stream, int chunkSize) throws IOException { | ||
| // Read data from the stream in chunks to support databases >2GB. | ||
| // Invariant: All chunks except the last are exactly chunkSize bytes. | ||
| var chunks = new ArrayList<byte[]>(); | ||
|
|
@@ -116,17 +123,16 @@ final class BufferHolder { | |
|
|
||
| if (chunks.size() == 1) { | ||
| // For databases that fit in a single chunk, use SingleBuffer | ||
| this.buffer = SingleBuffer.wrap(chunks.get(0)); | ||
| } else { | ||
| // For large databases, wrap chunks in ByteBuffers and use MultiBuffer | ||
| // Guaranteed: chunks[0..n-2] all have length == chunkSize | ||
| // chunks[n-1] may have length < chunkSize | ||
| var buffers = new ByteBuffer[chunks.size()]; | ||
| for (var i = 0; i < chunks.size(); i++) { | ||
| buffers[i] = ByteBuffer.wrap(chunks.get(i)); | ||
| } | ||
| this.buffer = new MultiBuffer(buffers, chunkSize); | ||
| return SingleBuffer.wrap(chunks.get(0)); | ||
| } | ||
| // For large databases, wrap chunks in ByteBuffers and use MultiBuffer | ||
| // Guaranteed: chunks[0..n-2] all have length == chunkSize | ||
| // chunks[n-1] may have length < chunkSize | ||
| var buffers = new ByteBuffer[chunks.size()]; | ||
| for (var i = 0; i < chunks.size(); i++) { | ||
| buffers[i] = ByteBuffer.wrap(chunks.get(i)); | ||
| } | ||
| return new MultiBuffer(buffers, chunkSize); | ||
| } | ||
|
|
||
| /* | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The calculation of
fullChunksandtotalChunksuses an explicit cast fromlongtointwithout checking for overflow. If the database size is extremely large relative to thechunkSize(e.g., a multi-terabyte database with a small customchunkSize),fullChunkscould overflow, leading to an incorrectly sizedbuffersarray and potential data corruption orIndexOutOfBoundsExceptionduring the reading loop. While MaxMind databases are typically withinintrange for chunk counts, it is safer to validate this or useMath.toIntExact.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Claude replying on Greg's behalf.)
Thanks for flagging — leaving the cast unchanged in this PR for a few reasons:
(int) (size / chunkSize)pattern (and the parallel(int) (size % chunkSize)) was in the prior multi-chunk MEMORY path before the refactor, so this isn't a regression we're shipping.byte[]size limits (Integer.MAX_VALUE),ByteBuffer.allocate(int)'sintcap, and JVM heap limits.chunkSizeis set by the library, not external input.If/when very-large-DB support beyond ~2GB chunks becomes a goal, the right move is probably a broader audit of the
intcasts and array indexing throughoutBufferHolder/MultiBuffer, not justMath.toIntExactin this one spot. Out of scope for the current PR.