Skip to content
This repository was archived by the owner on Feb 18, 2026. It is now read-only.

Commit 02ca495

Browse files
committed
Add pre-baked Cassandra Docker image for faster test startup
1 parent 8e222db commit 02ca495

6 files changed

Lines changed: 143 additions & 52 deletions

File tree

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ repos:
4040
exclude: |
4141
(?x)^(
4242
tests/cassandra/insert.py|
43+
tests/cassandra/init-schemas.py|
4344
tests/tagstore/insert.py|
4445
tests/txs_pagesize_tester.py|
4546
tests/txs_pagesize_tests.py

Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ WORKTREE_DIR ?= ../.graphsense-rest-old
1818
test: install-dev
1919
uv run pytest -x -rx -vv
2020

21+
test-reset-cassandra-image:
22+
docker rmi graphsense/cassandra-test:4.1.4
23+
2124
test-regression:
2225
@export SKIP_REST_CONTAINER_SETUP=True && uv run pytest -m "regression" -s
2326

@@ -97,4 +100,4 @@ tag-version: ensure-versions-alignment
97100
-git diff --exit-code && git diff --staged --exit-code && git tag -a v$(GS_REST_SERVICE_VERSIONM) -m 'Release v$(GS_REST_SERVICE_VERSION)' || (echo "Repo is dirty please commit first" && exit 1)
98101
git diff --exit-code && git diff --staged --exit-code && git tag -a v$(GS_REST_SERVICE_VERSION) -m 'Release v$(GS_REST_SERVICE_VERSION)' || (echo "Repo is dirty please commit first" && exit 1)
99102

100-
.PHONY: format lint test ensure-versions-alignment run-codegen serve serve-docker pre-commit install-dev tag-version generate-python-client build-docker test-migration setup-migration-worktree clean-migration-worktree serve-old serve-new test-regression
103+
.PHONY: format lint test ensure-versions-alignment run-codegen serve serve-docker pre-commit install-dev tag-version generate-python-client build-docker build-test-cassandra test-reset-cassandra-image test-migration setup-migration-worktree clean-migration-worktree serve-old serve-new test-regression

tests/cassandra/Dockerfile

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Pre-baked Cassandra image with GraphSense test schemas
2+
# Build: docker build -t graphsense/cassandra-test:4.1.4 tests/cassandra/
3+
FROM cassandra:4.1.4
4+
5+
# Copy schema initialization script
6+
COPY init-schemas.py /init-schemas.py
7+
8+
# Install Python dependencies for schema init
9+
RUN apt-get update && \
10+
apt-get install -y --no-install-recommends python3 python3-pip && \
11+
pip3 install cassandra-driver requests && \
12+
apt-get clean && \
13+
rm -rf /var/lib/apt/lists/*
14+
15+
# Configure for fast single-node startup BEFORE first start
16+
# This is critical - num_tokens is locked after first start
17+
RUN sed -i 's/num_tokens: 16/num_tokens: 1/' /etc/cassandra/cassandra.yaml
18+
19+
# Set JVM options for fast startup
20+
ENV JVM_EXTRA_OPTS="-Dcassandra.ring_delay_ms=100 -Dcassandra.skip_wait_for_gossip_to_settle=0"
21+
22+
RUN set -e && \
23+
echo "Starting Cassandra for schema initialization..." && \
24+
cassandra -R && \
25+
echo "Waiting for Cassandra to be ready..." && \
26+
until cqlsh -e "SELECT now() FROM system.local" 2>/dev/null; do sleep 1; done && \
27+
echo "Cassandra is ready, creating schemas..." && \
28+
python3 /init-schemas.py && \
29+
echo "Schemas created, stopping Cassandra..." && \
30+
nodetool stopdaemon && \
31+
sleep 2 && \
32+
echo "Schema initialization complete."
33+
34+
# Clean up init script (no longer needed)
35+
RUN rm /init-schemas.py

tests/cassandra/init-schemas.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python3
2+
"""Initialize Cassandra with GraphSense test schemas at Docker build time."""
3+
4+
import sys
5+
6+
import requests
7+
from cassandra.cluster import Cluster
8+
9+
TAG = "master"
10+
SCHEMA_BASE = f"https://raw.githubusercontent.com/graphsense/graphsense-lib/{TAG}/src/graphsenselib/schema/resources/"
11+
12+
SCHEMA_MAPPING = {"btc": "utxo", "ltc": "utxo", "eth": "account", "trx": "account_trx"}
13+
SCHEMA_MAPPING_OVERRIDE = {("trx", "transformed"): "account"}
14+
15+
MAGIC_REPLACE_CONSTANT = "0x8BADF00D"
16+
MAGIC_REPLACE_CONSTANT2 = f"{MAGIC_REPLACE_CONSTANT}_REPLICATION_CONFIG"
17+
SIMPLE_REPLICATION_CONFIG = "{'class': 'SimpleStrategy', 'replication_factor': 1}"
18+
19+
20+
def get_schema_file(filename):
21+
res = requests.get(SCHEMA_BASE + filename)
22+
res.raise_for_status()
23+
return res.text
24+
25+
26+
def main():
27+
cluster = Cluster(["127.0.0.1"])
28+
session = cluster.connect()
29+
30+
for currency, schema_base in SCHEMA_MAPPING.items():
31+
for schema_type in ["raw", "transformed"]:
32+
schema_name = SCHEMA_MAPPING_OVERRIDE.get(
33+
(currency, schema_type), schema_base
34+
)
35+
filename = f"{schema_type}_{schema_name}_schema.sql"
36+
keyspace = f"resttest_{currency}_{schema_type}"
37+
38+
# Log progress during Docker build
39+
sys.stdout.write(f"Creating schema: {keyspace} from {filename}\n")
40+
sys.stdout.flush()
41+
42+
schema_str = (
43+
get_schema_file(filename)
44+
.replace(MAGIC_REPLACE_CONSTANT2, SIMPLE_REPLICATION_CONFIG)
45+
.replace(MAGIC_REPLACE_CONSTANT, keyspace)
46+
)
47+
48+
for stmt in schema_str.split(";"):
49+
stmt = stmt.strip()
50+
if stmt:
51+
session.execute(stmt)
52+
53+
sys.stdout.write("All schemas created successfully!\n")
54+
cluster.shutdown()
55+
56+
57+
if __name__ == "__main__":
58+
main()

tests/cassandra/insert.py

Lines changed: 18 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,31 @@
1-
import logging
21
import os
3-
from functools import cache
42
from pathlib import Path
53

6-
import requests
74
from cassandra.cluster import Cluster
85

96
DATA_DIR = Path(__file__).parent.resolve() / "data"
107

11-
TAG = "master"
12-
SCHEMA_BASE = f"https://raw.githubusercontent.com/graphsense/graphsense-lib/{TAG}/src/graphsenselib/schema/resources/"
13-
14-
SCHEMA_MAPPING = {"btc": "utxo", "ltc": "utxo", "eth": "account", "trx": "account_trx"}
15-
16-
SCHEMA_MAPPING_OVERRIDE = {("trx", "transformed"): "account"}
17-
18-
MAGIC_REPLACE_CONSTANT = "0x8BADF00D"
19-
MAGIC_REPLACE_CONSTANT2 = f"{MAGIC_REPLACE_CONSTANT}_REPLICATION_CONFIG"
20-
21-
SIMPLE_REPLICATION_CONFIG = "{'class': 'SimpleStrategy', 'replication_factor': 1}"
22-
23-
24-
@cache
25-
def get_schema_file(file: str):
26-
res = requests.get(SCHEMA_BASE + file)
27-
return res.text
28-
298

309
def load_test_data(host, port):
10+
"""Load test data into pre-baked Cassandra (schemas already exist)."""
3111
cluster = Cluster([host], port=port)
3212
session = cluster.connect()
3313

34-
for k, v in SCHEMA_MAPPING.items():
35-
for st in ["raw", "transformed"]:
36-
v = SCHEMA_MAPPING_OVERRIDE.get((k, st), v)
37-
filename = f"{st}_{v}_schema.sql"
38-
keyspace = f"resttest_{k}_{st}"
39-
40-
logging.info(f"creating db tables cassandra {filename}")
41-
schema_str = (
42-
get_schema_file(filename)
43-
.replace(MAGIC_REPLACE_CONSTANT2, SIMPLE_REPLICATION_CONFIG)
44-
.replace(MAGIC_REPLACE_CONSTANT, keyspace)
45-
)
46-
for x in schema_str.split(";"):
47-
x = x.strip()
48-
if x:
49-
session.execute(x)
50-
51-
for x in DATA_DIR.iterdir():
52-
if not x.is_file():
14+
# Collect all insert statements
15+
inserts = []
16+
for file_path in DATA_DIR.iterdir():
17+
if not file_path.is_file():
5318
continue
54-
table_name = os.path.basename(x)
55-
content = x.read_text()
56-
for x in content.split("\n"):
57-
x = x.strip()
58-
if x:
59-
session.execute(
60-
f"""
61-
INSERT INTO {table_name} JSON '{x}'
62-
"""
63-
)
19+
table_name = os.path.basename(file_path)
20+
content = file_path.read_text()
21+
for line in content.split("\n"):
22+
line = line.strip()
23+
if line:
24+
inserts.append(f"INSERT INTO {table_name} JSON '{line}'")
25+
26+
# Execute inserts concurrently using async Cassandra driver
27+
futures = [session.execute_async(stmt) for stmt in inserts]
28+
29+
# Wait for all inserts to complete
30+
for future in futures:
31+
future.result()

tests/conftest.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import subprocess
12
from os import environ
3+
from pathlib import Path
24

5+
import docker
36
import pytest
47
from testcontainers.cassandra import CassandraContainer
58
from testcontainers.postgres import PostgresContainer
@@ -9,7 +12,30 @@
912
from tests.tagstore.insert import load_test_data as tags_load_test_data
1013

1114
postgres = PostgresContainer("postgres:16-alpine")
12-
cassandra = CassandraContainer("cassandra:4.1.4")
15+
16+
# Pre-baked Cassandra image with schemas and fast startup settings already configured
17+
# Build with: make build-test-cassandra
18+
# Baked-in optimizations: NUM_TOKENS=1, ring_delay_ms=100, skip_wait_for_gossip=0
19+
CASSANDRA_TEST_IMAGE = environ.get(
20+
"CASSANDRA_TEST_IMAGE", "graphsense/cassandra-test:4.1.4"
21+
)
22+
23+
24+
def ensure_cassandra_image_exists():
25+
"""Build Cassandra test image if it doesn't exist locally."""
26+
client = docker.from_env()
27+
try:
28+
client.images.get(CASSANDRA_TEST_IMAGE)
29+
except docker.errors.ImageNotFound:
30+
dockerfile_path = Path(__file__).parent / "cassandra"
31+
subprocess.run(
32+
["docker", "build", "-t", CASSANDRA_TEST_IMAGE, str(dockerfile_path)],
33+
check=True,
34+
)
35+
36+
37+
ensure_cassandra_image_exists()
38+
cassandra = CassandraContainer(CASSANDRA_TEST_IMAGE)
1339

1440

1541
@pytest.fixture(scope="session", autouse=True)

0 commit comments

Comments
 (0)