diff --git a/sdks/java/src/main/java/org/byteveda/taskito/errors/ProxyException.java b/sdks/java/src/main/java/org/byteveda/taskito/errors/ProxyException.java new file mode 100644 index 00000000..c8c4de45 --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/errors/ProxyException.java @@ -0,0 +1,18 @@ +package org.byteveda.taskito.errors; + +import org.byteveda.taskito.TaskitoException; + +/** + * A non-serializable value could not be turned into a {@code ProxyRef}, or a ref + * could not be reconstructed — no handler, a signature mismatch, or a value + * outside an allowlist. + */ +public class ProxyException extends TaskitoException { + public ProxyException(String message) { + super(message); + } + + public ProxyException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/sdks/java/src/main/java/org/byteveda/taskito/proxies/FileProxyHandler.java b/sdks/java/src/main/java/org/byteveda/taskito/proxies/FileProxyHandler.java new file mode 100644 index 00000000..02d8d1ef --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/proxies/FileProxyHandler.java @@ -0,0 +1,100 @@ +package org.byteveda.taskito.proxies; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import org.byteveda.taskito.errors.ProxyException; + +/** + * Proxies a {@link File} by its absolute path. An optional allowlist of root + * directories is enforced on reconstruct, so a tampered or hostile ref cannot + * resolve to a path outside them (an empty allowlist permits any path). + */ +public final class FileProxyHandler implements ProxyHandler { + private final List allowedRoots; + + public FileProxyHandler() { + this(List.of()); + } + + public FileProxyHandler(List allowedRoots) { + List roots = new ArrayList<>(allowedRoots.size()); + for (Path root : allowedRoots) { + // Resolve each root to its real path so containment is checked against + // the true filesystem location, not a symlinked alias. + roots.add(realPath(root.toAbsolutePath().normalize())); + } + this.allowedRoots = List.copyOf(roots); + } + + @Override + public String id() { + return "file"; + } + + @Override + public boolean handles(Object value) { + return value instanceof File; + } + + @Override + public Map deconstruct(File value) { + return Map.of("path", value.getAbsolutePath()); + } + + @Override + public File reconstruct(Map reference) { + if (reference == null) { + throw new ProxyException("file proxy ref has no reference"); + } + Object path = reference.get("path"); + if (!(path instanceof String)) { + throw new ProxyException("file proxy ref missing 'path'"); + } + Path resolved = Paths.get((String) path).toAbsolutePath().normalize(); + if (!allowed(resolved)) { + throw new ProxyException("file path not in allowlist: " + resolved); + } + return resolved.toFile(); + } + + private boolean allowed(Path path) { + if (allowedRoots.isEmpty()) { + return true; + } + Path real = realPath(path); + for (Path root : allowedRoots) { + if (real.startsWith(root)) { + return true; + } + } + return false; + } + + /** + * The real (symlink-resolved) path. Since the target may not exist yet, resolve + * the nearest existing ancestor to its real path — collapsing any symlinked + * ancestor — then re-append the remaining segments. A path whose ancestors do + * not exist yet has no symlink to hide behind, so its normalized form stands. + */ + private static Path realPath(Path path) { + Path existing = path; + while (existing != null && !Files.exists(existing)) { + existing = existing.getParent(); + } + if (existing == null) { + return path; + } + try { + Path realExisting = existing.toRealPath(); + return realExisting.resolve(existing.relativize(path)).normalize(); + } catch (IOException e) { + throw new ProxyException("failed to resolve real path for " + path, e); + } + } +} diff --git a/sdks/java/src/main/java/org/byteveda/taskito/proxies/Proxies.java b/sdks/java/src/main/java/org/byteveda/taskito/proxies/Proxies.java new file mode 100644 index 00000000..98a1f228 --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/proxies/Proxies.java @@ -0,0 +1,95 @@ +package org.byteveda.taskito.proxies; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Base64; +import java.util.LinkedHashMap; +import java.util.Map; +import javax.crypto.Mac; +import javax.crypto.spec.SecretKeySpec; +import org.byteveda.taskito.errors.ProxyException; + +/** + * Registry that deconstructs resources into signed {@link ProxyRef}s and + * reconstructs them. Construct with an HMAC key (shared by producer and worker), + * register a {@link ProxyHandler} per resource type, then + * {@link #deconstruct(Object)} on the producer and {@link #reconstruct(ProxyRef)} + * (or {@link #resolve(ProxyRef)}) on the worker. + */ +public final class Proxies { + private static final String ALGORITHM = "HmacSHA256"; + + private final Map> handlers = new LinkedHashMap<>(); + private final byte[] key; + private final ObjectMapper canonical = + new ObjectMapper().configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true); + + public Proxies(byte[] hmacKey) { + this.key = hmacKey.clone(); + } + + /** Register a handler under its non-null, unique id; returns {@code this}. */ + public Proxies register(ProxyHandler handler) { + String id = handler.id(); + if (id == null) { + throw new ProxyException("proxy handler id must not be null"); + } + // Fail fast on a duplicate: silently overwriting would let a producer and + // worker disagree on what a given ProxyRef's handler id means. + if (handlers.putIfAbsent(id, handler) != null) { + throw new ProxyException("proxy handler '" + id + "' is already registered"); + } + return this; + } + + /** Deconstruct {@code value} into a signed ref; throws if no handler accepts it. */ + @SuppressWarnings("unchecked") + public ProxyRef deconstruct(Object value) { + if (value == null) { + throw new ProxyException("cannot deconstruct null"); + } + for (ProxyHandler handler : handlers.values()) { + if (handler.handles(value)) { + Map reference = ((ProxyHandler) handler).deconstruct(value); + return new ProxyRef(handler.id(), reference, sign(handler.id(), reference)); + } + } + throw new ProxyException("no proxy handler for " + value.getClass().getName()); + } + + /** Verify a ref's signature and reconstruct the resource. */ + @SuppressWarnings("unchecked") + public Object reconstruct(ProxyRef ref) { + ProxyHandler handler = (ProxyHandler) handlers.get(ref.handler()); + if (handler == null) { + throw new ProxyException("unknown proxy handler '" + ref.handler() + "'"); + } + byte[] expected = sign(ref.handler(), ref.reference()).getBytes(StandardCharsets.UTF_8); + byte[] actual = (ref.signature() == null ? "" : ref.signature()).getBytes(StandardCharsets.UTF_8); + if (!MessageDigest.isEqual(expected, actual)) { + throw new ProxyException("proxy signature mismatch for handler '" + ref.handler() + "'"); + } + return handler.reconstruct(ref.reference()); + } + + /** {@link #reconstruct(ProxyRef)} cast to the caller's type. */ + @SuppressWarnings("unchecked") + public T resolve(ProxyRef ref) { + return (T) reconstruct(ref); + } + + private String sign(String handlerId, Map reference) { + try { + Mac mac = Mac.getInstance(ALGORITHM); + mac.init(new SecretKeySpec(key, ALGORITHM)); + mac.update(handlerId.getBytes(StandardCharsets.UTF_8)); + mac.update((byte) '\n'); + mac.update(canonical.writeValueAsBytes(reference)); + return Base64.getEncoder().encodeToString(mac.doFinal()); + } catch (Exception e) { + throw new ProxyException("failed to sign proxy ref", e); + } + } +} diff --git a/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyHandler.java b/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyHandler.java new file mode 100644 index 00000000..89fde023 --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyHandler.java @@ -0,0 +1,24 @@ +package org.byteveda.taskito.proxies; + +import java.util.Map; + +/** + * Deconstructs a non-serializable resource of type {@code T} into a serializable + * reference, and reconstructs it on the worker. Register handlers with a + * {@link Proxies} registry. + * + * @param the resource type this handler proxies + */ +public interface ProxyHandler { + /** Stable id stored in the {@link ProxyRef} and used to find this handler on the worker. */ + String id(); + + /** Whether this handler can proxy {@code value}. */ + boolean handles(Object value); + + /** Reduce {@code value} to a serializable reference (e.g. a file path, a config map). */ + Map deconstruct(T value); + + /** Rebuild the resource from a reference produced by {@link #deconstruct}. */ + T reconstruct(Map reference); +} diff --git a/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyRef.java b/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyRef.java new file mode 100644 index 00000000..332e43fd --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/proxies/ProxyRef.java @@ -0,0 +1,16 @@ +package org.byteveda.taskito.proxies; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import java.util.Map; + +/** + * A serializable, signed reference to a non-serializable resource. Carry it in a + * job payload; the worker reconstructs the resource with + * {@link Proxies#reconstruct}. + * + * @param handler the {@link ProxyHandler#id()} that produced (and reconstructs) it + * @param reference the handler's serializable reference data + * @param signature an HMAC over {@code handler + reference}, verified on reconstruct + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public record ProxyRef(String handler, Map reference, String signature) {} diff --git a/sdks/java/src/main/java/org/byteveda/taskito/proxies/package-info.java b/sdks/java/src/main/java/org/byteveda/taskito/proxies/package-info.java new file mode 100644 index 00000000..1653f360 --- /dev/null +++ b/sdks/java/src/main/java/org/byteveda/taskito/proxies/package-info.java @@ -0,0 +1,14 @@ +/** + * Pass non-serializable resources through a job payload by reference. + * + *

Java's typed, JSON-serialized payloads don't lend themselves to Python's + * implicit argument proxying, so this is explicit: deconstruct a value into a + * signed, serializable {@link org.byteveda.taskito.proxies.ProxyRef} on the + * producer ({@link org.byteveda.taskito.proxies.Proxies#deconstruct}), carry it + * in the payload, and reconstruct it in the handler + * ({@link org.byteveda.taskito.proxies.Proxies#reconstruct}). Refs are + * HMAC-signed; a {@link org.byteveda.taskito.proxies.ProxyHandler} per resource + * type does the (de)construction, with an allowlist where it matters + * (e.g. {@link org.byteveda.taskito.proxies.FileProxyHandler}). + */ +package org.byteveda.taskito.proxies; diff --git a/sdks/java/src/test/java/org/byteveda/taskito/ProxyTest.java b/sdks/java/src/test/java/org/byteveda/taskito/ProxyTest.java new file mode 100644 index 00000000..067ce6c4 --- /dev/null +++ b/sdks/java/src/test/java/org/byteveda/taskito/ProxyTest.java @@ -0,0 +1,96 @@ +package org.byteveda.taskito; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.File; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Map; +import org.byteveda.taskito.errors.ProxyException; +import org.byteveda.taskito.proxies.FileProxyHandler; +import org.byteveda.taskito.proxies.Proxies; +import org.byteveda.taskito.proxies.ProxyRef; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +class ProxyTest { + + private static final byte[] KEY = "proxy-secret-key".getBytes(); + private final ObjectMapper json = new ObjectMapper(); + + @Test + void roundTripsFileAcrossTheWire(@TempDir Path dir) throws Exception { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler()); + File file = dir.resolve("data.txt").toFile(); + + ProxyRef ref = proxies.deconstruct(file); + ProxyRef onWire = json.readValue(json.writeValueAsBytes(ref), ProxyRef.class); // simulate serialization + File reconstructed = proxies.resolve(onWire); + + assertEquals(file.getAbsolutePath(), reconstructed.getAbsolutePath()); + } + + @Test + void rejectsTamperedRef(@TempDir Path dir) { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler()); + ProxyRef ref = proxies.deconstruct(dir.resolve("a").toFile()); + ProxyRef tampered = new ProxyRef(ref.handler(), Map.of("path", "/etc/passwd"), ref.signature()); + + assertThrows(ProxyException.class, () -> proxies.reconstruct(tampered)); + } + + @Test + void enforcesAllowlist(@TempDir Path dir) { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler(List.of(dir))); + + File inside = dir.resolve("ok.txt").toFile(); + File back = proxies.resolve(proxies.deconstruct(inside)); + assertEquals(inside.getAbsolutePath(), back.getAbsolutePath()); + + File outside = dir.getParent().resolve("outside.txt").toFile(); + ProxyRef ref = proxies.deconstruct(outside); + assertThrows(ProxyException.class, () -> proxies.reconstruct(ref)); + } + + @Test + void allowlistRejectsSymlinkedAncestorEscape(@TempDir Path dir) throws Exception { + Path allowed = Files.createDirectory(dir.resolve("allowed")); + Path secret = Files.createDirectory(dir.resolve("secret")); + Files.writeString(secret.resolve("data.txt"), "top secret"); + // A symlink inside the allowed root pointing at the secret dir: lexically + // under the allowlist, but its real target is not. + Path link = allowed.resolve("link"); + Files.createSymbolicLink(link, secret); + + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler(List.of(allowed))); + ProxyRef ref = proxies.deconstruct(link.resolve("data.txt").toFile()); + assertThrows(ProxyException.class, () -> proxies.reconstruct(ref)); + } + + @Test + void rejectsValueWithNoHandler() { + Proxies proxies = new Proxies(KEY); + assertThrows(ProxyException.class, () -> proxies.deconstruct("not proxyable")); + } + + @Test + void rejectsUnknownHandlerOnReconstruct() { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler()); + assertThrows(ProxyException.class, () -> proxies.reconstruct(new ProxyRef("nope", Map.of(), "sig"))); + } + + @Test + void rejectsNullValueOnDeconstruct() { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler()); + assertThrows(ProxyException.class, () -> proxies.deconstruct(null)); + } + + @Test + void rejectsDuplicateHandlerId() { + Proxies proxies = new Proxies(KEY).register(new FileProxyHandler()); + assertThrows(ProxyException.class, () -> proxies.register(new FileProxyHandler())); + } +}