Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
CHANGELOG
=========

4.1.0
------------------

* Fixed unbounded off-heap memory growth when initializing the reader in
`FileMode.MEMORY`. The previous implementation read the database via
`FileChannel.read()` into a heap buffer, which causes the JDK to cache
temporary direct ByteBuffers in per-thread storage
(`sun.nio.ch.Util.BufferCache`). Repeated initialization across different
threads could grow this cache without bound. The reader now uses
`FileInputStream` for `MEMORY` mode, which bypasses the cache.
`FileMode.MEMORY_MAPPED` was unaffected.
* Fixed a latent short-read bug in the multi-chunk `FileMode.MEMORY`
load path introduced in 4.0.0. `FileChannel.read(ByteBuffer)` is not
contractually obligated to fully fill the destination buffer; a
short read could have caused silent truncation of an in-memory
database. Affects databases larger than ~2GB (the default chunk
size). The new chunked read loop retries until each chunk is fully
populated.

4.0.2 (2025-12-08)
------------------

Expand Down
34 changes: 34 additions & 0 deletions mise.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html

[[tools.java]]
version = "26.0.1"
backend = "core:java"

[tools.java."platforms.linux-x64"]
checksum = "sha256:2f2802d57b5fc414f1ddf6648ba12cc9a6454cf67b32ac95407c018f2e6ab0b0"
url = "https://download.java.net/java/GA/jdk26.0.1/458fda22e4c54d5ba572ab8d2b22eb83/8/GPL/openjdk-26.0.1_linux-x64_bin.tar.gz"

[[tools.maven]]
version = "3.9.15"
backend = "aqua:apache/maven"

[tools.maven."platforms.linux-arm64"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.linux-arm64-musl"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.linux-x64"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.linux-x64-musl"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.macos-arm64"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.macos-x64"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"

[tools.maven."platforms.windows-x64"]
url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
18 changes: 18 additions & 0 deletions mise.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[settings]
experimental = true
lockfile = true
disable_backends = [
"asdf",
"vfox",
]

[tools]
java = "latest"
maven = "latest"

[hooks]
enter = "mise install --quiet --locked"

[[watch_files]]
patterns = ["mise.toml", "mise.lock"]
run = "mise install --quiet --locked"
98 changes: 52 additions & 46 deletions src/main/java/com/maxmind/db/BufferHolder.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.maxmind.db.Reader.FileMode;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
Expand All @@ -23,53 +24,38 @@ final class BufferHolder {
}

BufferHolder(File database, FileMode mode, int chunkSize) throws IOException {
try (RandomAccessFile file = new RandomAccessFile(database, "r");
FileChannel channel = file.getChannel()) {
long size = channel.size();
if (mode == FileMode.MEMORY) {
if (mode == FileMode.MEMORY) {
// FileInputStream avoids the per-thread direct ByteBuffer cache that
// FileChannel.read() populates when reading into a heap buffer. That cache
// retains the largest direct buffer ever requested — under chunked MEMORY
// mode that would mean chunkSize bytes of off-heap memory held per loader
// thread for the JVM's lifetime.
try (FileInputStream stream = new FileInputStream(database)) {
// Size from the open fd (fstat) so it's atomic with the open;
// getChannel().size() does not populate the buffer cache.
long size = stream.getChannel().size();
var name = database.getName();
if (size <= chunkSize) {
// Allocate, read, and make read-only
ByteBuffer buffer = ByteBuffer.allocate((int) size);
if (channel.read(buffer) != size) {
throw new IOException("Unable to read "
+ database.getName()
+ " into memory. Unexpected end of stream.");
}
buffer.flip();
this.buffer = new SingleBuffer(buffer);
this.buffer = SingleBuffer.wrap(readFully(stream, (int) size, name));
} else {
// Allocate chunks, read, and make read-only
var fullChunks = (int) (size / chunkSize);
var remainder = (int) (size % chunkSize);
var totalChunks = fullChunks + (remainder > 0 ? 1 : 0);
var buffers = new ByteBuffer[totalChunks];
Comment on lines 41 to 44
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The calculation of fullChunks and totalChunks uses an explicit cast from long to int without checking for overflow. If the database size is extremely large relative to the chunkSize (e.g., a multi-terabyte database with a small custom chunkSize), fullChunks could overflow, leading to an incorrectly sized buffers array and potential data corruption or IndexOutOfBoundsException during the reading loop. While MaxMind databases are typically within int range for chunk counts, it is safer to validate this or use Math.toIntExact.

                    var fullChunksLong = size / chunkSize;
                    var remainder = (int) (size % chunkSize);
                    var totalChunksLong = fullChunksLong + (remainder > 0 ? 1 : 0);
                    if (totalChunksLong > Integer.MAX_VALUE) {
                        throw new IOException("Database size exceeds supported chunk count for the given chunkSize");
                    }
                    var totalChunks = (int) totalChunksLong;
                    var fullChunks = (int) fullChunksLong;
                    var buffers = new ByteBuffer[totalChunks];

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Claude replying on Greg's behalf.)

Thanks for flagging — leaving the cast unchanged in this PR for a few reasons:

  1. Pre-existing, not introduced by this PR. The same (int) (size / chunkSize) pattern (and the parallel (int) (size % chunkSize)) was in the prior multi-chunk MEMORY path before the refactor, so this isn't a regression we're shipping.
  2. Not practically reachable. A real MaxMind DB at multi-TB sizes doesn't exist; well before this cast would overflow we'd hit byte[] size limits (Integer.MAX_VALUE), ByteBuffer.allocate(int)'s int cap, and JVM heap limits.
  3. Project guidance (CLAUDE.md) says to not add validation for scenarios that can't happen — at this layer, chunkSize is set by the library, not external input.

If/when very-large-DB support beyond ~2GB chunks becomes a goal, the right move is probably a broader audit of the int casts and array indexing throughout BufferHolder / MultiBuffer, not just Math.toIntExact in this one spot. Out of scope for the current PR.


for (int i = 0; i < fullChunks; i++) {
buffers[i] = ByteBuffer.allocate(chunkSize);
buffers[i] = ByteBuffer.wrap(readFully(stream, chunkSize, name));
}
if (remainder > 0) {
buffers[totalChunks - 1] = ByteBuffer.allocate(remainder);
buffers[totalChunks - 1] = ByteBuffer.wrap(
readFully(stream, remainder, name));
}

var totalRead = 0L;
for (var buffer : buffers) {
var read = channel.read(buffer);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting:

  1. Pre-existing short-read bug, now fixed (worth calling out). The old multi-chunk MEMORY path had a latent bug:

  for (var buffer : buffers) {
      var read = channel.read(buffer);   // may be a *short* read
      if (read == -1) { break; }
      totalRead += read;
      buffer.flip();                     // flips at the short position
  }

  FileChannel.read(ByteBuffer) is not contractually obligated to fully fill the buffer — a short read would leave the chunk partially populated and flip() would expose only the
  bytes that were actually read, silently truncating the in-memory database. The new readFully loop handles this correctly. Good catch (even if accidental).

Not sure if we'd want to mention in the changelog.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Claude replying on Greg's behalf.)

Done in 232dd73. Traced the introducing commit via git log -S: the buggy for (var buffer : buffers) { channel.read(buffer); buffer.flip(); } loop was added in 2a1fa7f "Make buffers read-only for all paths" and first shipped in v4.0.0, so 4.0.0/4.0.1/4.0.2 are all affected.

Added a second bullet under the 4.1.0 section noting:

  • The silent-truncation risk specifically for the multi-chunk path.
  • That it only affects databases larger than ~2GB (the default chunk size).
  • That the new chunked read loop retries until each chunk is fully populated.

The single-chunk MEMORY path has the same FileChannel.read(ByteBuffer) contract concern but at least throws when channel.read(buffer) != size, so only the multi-chunk path silently truncates — that's the one worth calling out.

if (read == -1) {
break;
}
totalRead += read;
buffer.flip();
}

if (totalRead != size) {
throw new IOException("Unable to read "
+ database.getName()
+ " into memory. Unexpected end of stream.");
}

this.buffer = new MultiBuffer(buffers, chunkSize);
}
} else {
}
} else {
try (RandomAccessFile file = new RandomAccessFile(database, "r");
FileChannel channel = file.getChannel()) {
long size = channel.size();
if (size <= chunkSize) {
this.buffer = SingleBuffer.mapFromChannel(channel);
} else {
Expand All @@ -79,11 +65,32 @@ final class BufferHolder {
}
}

BufferHolder(InputStream stream, int chunkSize) throws IOException {
BufferHolder(InputStream stream, int chunkSize) throws IOException {
if (null == stream) {
throw new NullPointerException("Unable to use a NULL InputStream");
}
this.buffer = readFromStream(stream, chunkSize);
}

// Pre-allocates exactly len bytes. Used by file-backed MEMORY mode where the size is
// known up front, avoiding the transient peak from ByteArrayOutputStream.grow() and
// the defensive copy in toByteArray().
private static byte[] readFully(InputStream stream, int len, String name) throws IOException {
var data = new byte[len];
var totalRead = 0;
while (totalRead < len) {
var n = stream.read(data, totalRead, len - totalRead);
if (n < 0) {
throw new IOException("Unable to read "
+ name
+ " into memory. Unexpected end of stream.");
}
totalRead += n;
}
return data;
}

private static Buffer readFromStream(InputStream stream, int chunkSize) throws IOException {
// Read data from the stream in chunks to support databases >2GB.
// Invariant: All chunks except the last are exactly chunkSize bytes.
var chunks = new ArrayList<byte[]>();
Expand Down Expand Up @@ -116,17 +123,16 @@ final class BufferHolder {

if (chunks.size() == 1) {
// For databases that fit in a single chunk, use SingleBuffer
this.buffer = SingleBuffer.wrap(chunks.get(0));
} else {
// For large databases, wrap chunks in ByteBuffers and use MultiBuffer
// Guaranteed: chunks[0..n-2] all have length == chunkSize
// chunks[n-1] may have length < chunkSize
var buffers = new ByteBuffer[chunks.size()];
for (var i = 0; i < chunks.size(); i++) {
buffers[i] = ByteBuffer.wrap(chunks.get(i));
}
this.buffer = new MultiBuffer(buffers, chunkSize);
return SingleBuffer.wrap(chunks.get(0));
}
// For large databases, wrap chunks in ByteBuffers and use MultiBuffer
// Guaranteed: chunks[0..n-2] all have length == chunkSize
// chunks[n-1] may have length < chunkSize
var buffers = new ByteBuffer[chunks.size()];
for (var i = 0; i < chunks.size(); i++) {
buffers[i] = ByteBuffer.wrap(chunks.get(i));
}
return new MultiBuffer(buffers, chunkSize);
}

/*
Expand Down
6 changes: 5 additions & 1 deletion src/main/java/com/maxmind/db/Reader.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,12 @@ public Reader(File database) throws IOException {
}

Reader(File database, int chunkSize) throws IOException {
this(database, FileMode.MEMORY_MAPPED, chunkSize);
}

Reader(File database, FileMode fileMode, int chunkSize) throws IOException {
this(
new BufferHolder(database, FileMode.MEMORY_MAPPED, chunkSize),
new BufferHolder(database, fileMode, chunkSize),
database.getName(),
NoCache.getInstance()
);
Expand Down
19 changes: 19 additions & 0 deletions src/test/java/com/maxmind/db/ReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import com.maxmind.db.Reader.FileMode;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
Expand Down Expand Up @@ -83,6 +84,24 @@ public void test(int chunkSize) throws IOException {
}
}

@ParameterizedTest
@MethodSource("chunkSizes")
public void testMemoryMode(int chunkSize) throws IOException {
for (long recordSize : new long[] {24, 28, 32}) {
for (int ipVersion : new int[] {4, 6}) {
var file = getFile("MaxMind-DB-test-ipv" + ipVersion + "-" + recordSize + ".mmdb");
try (var reader = new Reader(file, FileMode.MEMORY, chunkSize)) {
this.testMetadata(reader, ipVersion, recordSize);
if (ipVersion == 4) {
this.testIpV4(reader, file);
} else {
this.testIpV6(reader, file);
}
}
}
}
}

static class GetRecordTest {
InetAddress ip;
File db;
Expand Down
Loading