diff --git a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FieldType.java b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FieldType.java index ab2e76cb1..01d0440ae 100644 --- a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FieldType.java +++ b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FieldType.java @@ -59,6 +59,15 @@ private FieldType(int wt, DataType.ScalarType... aliases) { public int getWireType() { return _wireType; } + /** + * Whether fields of this type may use "packed" encoding when repeated + * (per protobuf spec, only scalar numeric/enum/boolean types can be packed; + * length-delimited types like String/Bytes/Message cannot). + */ + public boolean isPackable() { + return _wireType != WireType.LENGTH_PREFIXED; + } + public boolean usesZigZag() { return (this == VINT32_Z) || (this == VINT64_Z); } diff --git a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSet.java b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSet.java index 50834f1be..4b74d6b3b 100644 --- a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSet.java +++ b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSet.java @@ -136,10 +136,14 @@ public static class FileDescriptorProto public ProtoFile.Syntax getSyntax() { - if (syntax == null) { - return ProtoFile.Syntax.PROTO_2; + // 01-Jul-2026, tatu: [dataformats-binary#134] Real `FileDescriptorProto.syntax` + // values (as written by protoc) are lowercase ("proto2"/"proto3"), but the + // enum constants are PROTO_2/PROTO_3: `valueOf(syntax)` would throw for any + // proto3-origin descriptor set. + if ("proto3".equals(syntax)) { + return ProtoFile.Syntax.PROTO_3; } - return ProtoFile.Syntax.valueOf(syntax); + return ProtoFile.Syntax.PROTO_2; } public void setPackage(String p) { _package = p; } diff --git a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/NativeProtobufSchema.java b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/NativeProtobufSchema.java index 9d7ad9be4..5ce0de2df 100644 --- a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/NativeProtobufSchema.java +++ b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/NativeProtobufSchema.java @@ -15,17 +15,27 @@ public class NativeProtobufSchema protected final String _name; protected final Collection _nativeTypes; + /** + * @since 2.21.5 [dataformats-binary#134] + */ + protected final boolean _isProto3; + protected volatile String[] _messageNames; protected NativeProtobufSchema(ProtoFile input) { - this(input.filePath(), input.typeElements()); + this(input.filePath(), input.typeElements(), input.syntax() == ProtoFile.Syntax.PROTO_3); + } + + protected NativeProtobufSchema(String name, Collection types) { + this(name, types, false); } - protected NativeProtobufSchema(String name, Collection types) + protected NativeProtobufSchema(String name, Collection types, boolean isProto3) { _name = name; _nativeTypes = types; + _isProto3 = isProto3; } public static NativeProtobufSchema construct(ProtoFile input) { @@ -64,7 +74,7 @@ public ProtobufSchema forType(String messageTypeName) +"') has no message type with name '"+messageTypeName+"': known types: " +getMessageNames()); } - return new ProtobufSchema(this, TypeResolver.resolve(_nativeTypes, msg)); + return new ProtobufSchema(this, TypeResolver.resolve(_nativeTypes, msg, _isProto3)); } /** @@ -78,7 +88,7 @@ public ProtobufSchema forFirstType() throw new IllegalArgumentException("Protobuf schema definition (name '"+_name +"') contains no message type definitions"); } - return new ProtobufSchema(this, TypeResolver.resolve(_nativeTypes, msg)); + return new ProtobufSchema(this, TypeResolver.resolve(_nativeTypes, msg, _isProto3)); } public List getMessageNames() { diff --git a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/ProtobufField.java b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/ProtobufField.java index 619b5ac1a..5c72ab07e 100644 --- a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/ProtobufField.java +++ b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/ProtobufField.java @@ -59,23 +59,36 @@ public class ProtobufField public final boolean isStdEnum; public ProtobufField(FieldElement nativeField, FieldType type) { - this(nativeField, type, null, null); + this(nativeField, type, false); + } + + /** + * @param isProto3 Whether enclosing schema uses proto3 syntax: affects default + * "packed" setting for repeated scalar fields when not explicitly specified + * (proto3 defaults to packed, proto2 to unpacked) + */ + public ProtobufField(FieldElement nativeField, FieldType type, boolean isProto3) { + this(nativeField, type, null, null, isProto3); } public ProtobufField(FieldElement nativeField, ProtobufMessage msg) { - this(nativeField, FieldType.MESSAGE, msg, null); + this(nativeField, FieldType.MESSAGE, msg, null, false); } public ProtobufField(FieldElement nativeField, ProtobufEnum et) { - this(nativeField, FieldType.ENUM, null, et); + this(nativeField, FieldType.ENUM, null, et, false); + } + + public ProtobufField(FieldElement nativeField, ProtobufEnum et, boolean isProto3) { + this(nativeField, FieldType.ENUM, null, et, isProto3); } public static ProtobufField unknownField() { - return new ProtobufField(null, FieldType.MESSAGE, null, null); + return new ProtobufField(null, FieldType.MESSAGE, null, null, false); } protected ProtobufField(FieldElement nativeField, FieldType type, - ProtobufMessage msg, ProtobufEnum et) + ProtobufMessage msg, ProtobufEnum et, boolean isProto3) { this.type = type; wireType = type.getWireType(); @@ -113,8 +126,15 @@ protected ProtobufField(FieldElement nativeField, FieldType type, * we can't use 'isPacked()' in 3.1.5 (and probably deprecated has same issue); * let's add a temporary workaround. */ - packed = _findBooleanOption(nativeField, "packed"); - deprecated = _findBooleanOption(nativeField, "deprecated"); + Boolean explicitPacked = _findBooleanOptionValue(nativeField, "packed"); + if (explicitPacked != null) { + packed = explicitPacked.booleanValue(); + } else { + // 01-Jul-2026: [dataformats-binary#134] proto3 defaults repeated + // scalar/enum fields to packed encoding unless overridden + packed = repeated && isProto3 && type.isPackable(); + } + deprecated = Boolean.TRUE.equals(_findBooleanOptionValue(nativeField, "deprecated")); // 13-Apr-2017, tatu: [databind#79] Need to write length-prefixed for packed arrays if (repeated && packed) { @@ -127,18 +147,18 @@ protected ProtobufField(FieldElement nativeField, FieldType type, isObject = (type == FieldType.MESSAGE); } - private static boolean _findBooleanOption(FieldElement f, String key) + private static Boolean _findBooleanOptionValue(FieldElement f, String key) { for (OptionElement opt : f.options()) { if (key.equals(opt.name())) { Object val = opt.value(); if (val instanceof Boolean) { - return ((Boolean) val).booleanValue(); + return (Boolean) val; } - return "true".equals(String.valueOf(val).trim()); + return Boolean.valueOf("true".equals(String.valueOf(val).trim())); } } - return false; + return null; } public void assignMessageType(ProtobufMessage msgType) { diff --git a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/TypeResolver.java b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/TypeResolver.java index ca0099ced..ff78fb5a1 100644 --- a/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/TypeResolver.java +++ b/protobuf/src/main/java/com/fasterxml/jackson/dataformat/protobuf/schema/TypeResolver.java @@ -40,12 +40,21 @@ public class TypeResolver */ private Map _resolvedMessageTypes; + /** + * Whether enclosing schema (single .proto file) uses proto3 syntax: affects + * default "packed" setting for repeated scalar/enum fields. + * + * @since 2.21.5 [dataformats-binary#134] + */ + private final boolean _isProto3; + protected TypeResolver(TypeResolver p, String name, Map declaredMsgs, - Map enums) + Map enums, boolean isProto3) { _parent = p; _contextName = name; _enumTypes = enums; + _isProto3 = isProto3; if (declaredMsgs == null) { declaredMsgs = Collections.emptyMap(); } @@ -58,21 +67,29 @@ protected TypeResolver(TypeResolver p, String name, Map d * types it depends on. */ public static ProtobufMessage resolve(Collection nativeTypes, MessageElement rawType) { - final TypeResolver rootR = construct(null, null, nativeTypes); + return resolve(nativeTypes, rawType, false); + } + + /** + * @since 2.21.5 [dataformats-binary#134] + */ + public static ProtobufMessage resolve(Collection nativeTypes, MessageElement rawType, + boolean isProto3) { + final TypeResolver rootR = construct(null, null, nativeTypes, isProto3); // Important: parent context for "root types", but child context for nested; further, // resolution happens in "child" context to allow proper referencing - return TypeResolver.construct(rootR, rawType.name(), rawType.nestedElements()) + return TypeResolver.construct(rootR, rawType.name(), rawType.nestedElements(), isProto3) ._resolve(rawType); } protected ProtobufMessage resolve(TypeResolver parent, MessageElement rawType) { - return TypeResolver.construct(this, rawType.name(), rawType.nestedElements()) + return TypeResolver.construct(this, rawType.name(), rawType.nestedElements(), _isProto3) ._resolve(rawType); } protected static TypeResolver construct(TypeResolver parent, String localName, - Collection nativeTypes) + Collection nativeTypes, boolean isProto3) { Map declaredMsgs = null; Map declaredEnums = new LinkedHashMap<>(); @@ -92,7 +109,7 @@ protected static TypeResolver construct(TypeResolver parent, String localName, } } // no other known types? } - return new TypeResolver(parent, localName, declaredMsgs, declaredEnums); + return new TypeResolver(parent, localName, declaredMsgs, declaredEnums, isProto3); } protected void addEnumType(String name, ProtobufEnum enumType) { @@ -125,6 +142,17 @@ protected static ProtobufEnum constructEnum(EnumElement nativeEnum) protected ProtobufMessage _resolve(MessageElement rawType) { List rawFields = rawType.fields(); + List oneOfs = rawType.oneOfs(); + // 01-Jul-2026, tatu: [dataformats-binary#134] Fields declared inside a + // `oneof` block live in a separate list from regular fields and were + // silently dropped during resolution; merge them in so they're not lost. + if (!oneOfs.isEmpty()) { + List merged = new ArrayList(rawFields); + for (OneOfElement oneOf : oneOfs) { + merged.addAll(oneOf.fields()); + } + rawFields = merged; + } ProtobufField[] resolvedFields = new ProtobufField[rawFields.size()]; ProtobufMessage message = new ProtobufMessage(rawType.name(), resolvedFields); @@ -142,7 +170,7 @@ protected ProtobufMessage _resolve(MessageElement rawType) ProtobufField pbf; if (type != null) { // simple type - pbf = new ProtobufField(f, type); + pbf = new ProtobufField(f, type, _isProto3); } else if (fieldType instanceof DataType.NamedType) { final String typeStr = ((DataType.NamedType) fieldType).name(); @@ -260,7 +288,7 @@ private ProtobufField _findLocalResolved(FieldElement nativeField, String typeSt } ProtobufEnum et = _enumTypes.get(typeStr); if (et != null) { - return new ProtobufField(nativeField, et); + return new ProtobufField(nativeField, et, _isProto3); } return null; } diff --git a/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/OneofFieldResolutionTest.java b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/OneofFieldResolutionTest.java new file mode 100644 index 000000000..62d47b125 --- /dev/null +++ b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/OneofFieldResolutionTest.java @@ -0,0 +1,56 @@ +package com.fasterxml.jackson.dataformat.protobuf; + +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.dataformat.protobuf.schema.ProtobufSchema; +import com.fasterxml.jackson.dataformat.protobuf.schema.ProtobufSchemaLoader; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +// [dataformats-binary#134]: fields declared inside a `oneof` block live in a +// separate list (`MessageElement.oneOfs()`) from regular fields +// (`MessageElement.fields()`); TypeResolver only ever resolved the latter, +// so `oneof` member fields were silently dropped from the schema -- no +// error, they simply couldn't be read or written. +public class OneofFieldResolutionTest extends ProtobufTestBase +{ + private final ProtobufMapper MAPPER = newObjectMapper(); + + private final static String SCHEMA_STR = "message t {\n" + + " oneof choice {\n" + + " string a = 1;\n" + + " int32 b = 2;\n" + + " }\n" + + " optional string other = 3;\n" + + "}\n"; + + @Test + public void testOneofFieldsAreResolved() throws Exception + { + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + + assertEquals(3, schema.getRootType().getFieldCount()); + assertNotNull(schema.getRootType().field("a")); + assertNotNull(schema.getRootType().field("b")); + assertNotNull(schema.getRootType().field("other")); + } + + @Test + public void testOneofFieldsRoundTrip() throws Exception + { + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + + ObjectNode input = MAPPER.createObjectNode(); + input.put("a", "value-for-a"); + input.put("other", "other-value"); + + byte[] bytes = MAPPER.writer(schema).writeValueAsBytes(input); + JsonNode result = MAPPER.readerFor(JsonNode.class).with(schema).readValue(bytes); + + assertEquals("value-for-a", result.get("a").asText()); + assertEquals("other-value", result.get("other").asText()); + } +} diff --git a/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/Proto3PackedDefault134Test.java b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/Proto3PackedDefault134Test.java new file mode 100644 index 000000000..4dd9a39dc --- /dev/null +++ b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/Proto3PackedDefault134Test.java @@ -0,0 +1,119 @@ +package com.fasterxml.jackson.dataformat.protobuf; + +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.dataformat.protobuf.schema.ProtobufSchema; +import com.fasterxml.jackson.dataformat.protobuf.schema.ProtobufSchemaLoader; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +// [dataformats-binary#134]: proto3 defaults repeated scalar/enum fields to +// "packed" wire encoding unless explicitly overridden; this library only +// ever honored an explicit `[packed=...]` option, causing decode failures +// for proto3-encoded content produced by real protoc-based implementations. +public class Proto3PackedDefault134Test extends ProtobufTestBase +{ + private final ProtobufMapper MAPPER = newObjectMapper(); + + static class IntArray { + public int[] f; + + protected IntArray() { } + public IntArray(int... v) { f = v; } + } + + @Test + public void testProto3RepeatedScalarDefaultsToPacked() throws Exception + { + final String SCHEMA_STR = "syntax = \"proto3\";\n" + + "message t {\n" + + " repeated uint32 f = 1;\n" + + "}\n"; + // As produced by a real proto3 encoder: packed (length-delimited) encoding + final byte[] pb = { 0xa, 0x3, 0x64, (byte) 0xc8, 0x1 }; // f = [100, 200], packed + + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + JsonNode t = MAPPER.readerFor(JsonNode.class).with(schema).readValue(pb); + + assertEquals(2, t.get("f").size()); + assertEquals(100, t.get("f").get(0).asInt()); + assertEquals(200, t.get("f").get(1).asInt()); + } + + @Test + public void testProto3ExplicitUnpackedOverrideStillHonored() throws Exception + { + final String SCHEMA_STR = "syntax = \"proto3\";\n" + + "message t {\n" + + " repeated uint32 f = 1 [packed=false];\n" + + "}\n"; + // unpacked: each value gets its own tag + final byte[] pb = { 0x8, 0x64, 0x8, (byte) 0xc8, 0x1 }; // f = [100, 200], unpacked + + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + JsonNode t = MAPPER.readerFor(JsonNode.class).with(schema).readValue(pb); + + assertEquals(2, t.get("f").size()); + assertEquals(100, t.get("f").get(0).asInt()); + assertEquals(200, t.get("f").get(1).asInt()); + } + + @Test + public void testProto2RepeatedScalarStaysUnpackedByDefault() throws Exception + { + final String SCHEMA_STR = "syntax = \"proto2\";\n" + + "message t {\n" + + " repeated uint32 f = 1;\n" + + "}\n"; + // unpacked: proto2 default, unaffected by this fix + final byte[] pb = { 0x8, 0x64, 0x8, (byte) 0xc8, 0x1 }; // f = [100, 200], unpacked + + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + JsonNode t = MAPPER.readerFor(JsonNode.class).with(schema).readValue(pb); + + assertEquals(2, t.get("f").size()); + assertEquals(100, t.get("f").get(0).asInt()); + assertEquals(200, t.get("f").get(1).asInt()); + } + + @Test + public void testNoSyntaxDeclarationStaysUnpackedByDefault() throws Exception + { + // no `syntax = ...;` line at all: must behave exactly as before (unpacked default) + final String SCHEMA_STR = "message t {\n" + + " repeated uint32 f = 1;\n" + + "}\n"; + final byte[] pb = { 0x8, 0x64, 0x8, (byte) 0xc8, 0x1 }; // f = [100, 200], unpacked + + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + JsonNode t = MAPPER.readerFor(JsonNode.class).with(schema).readValue(pb); + + assertEquals(2, t.get("f").size()); + assertEquals(100, t.get("f").get(0).asInt()); + assertEquals(200, t.get("f").get(1).asInt()); + } + + @Test + public void testProto3WriteDefaultsToPacked() throws Exception + { + final String SCHEMA_STR = "syntax = \"proto3\";\n" + + "message t {\n" + + " repeated uint32 f = 1;\n" + + "}\n"; + ProtobufSchema schema = ProtobufSchemaLoader.std.parse(SCHEMA_STR); + ObjectWriter w = MAPPER.writer(schema); + byte[] bytes = w.writeValueAsBytes(new IntArray(100, 200)); + + // packed: 1 byte tag, 1 byte length, 3 bytes for 100 (0x64) + 200 (0xc8,0x01) = 5 + assertEquals(5, bytes.length); + assertEquals(0xa, bytes[0]); // field 1, length-prefixed wire type + + // round-trip through the same schema + JsonNode t = MAPPER.readerFor(JsonNode.class).with(schema).readValue(bytes); + assertEquals(2, t.get("f").size()); + assertEquals(100, t.get("f").get(0).asInt()); + assertEquals(200, t.get("f").get(1).asInt()); + } +} diff --git a/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSetProto3Test.java b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSetProto3Test.java new file mode 100644 index 000000000..9236fee74 --- /dev/null +++ b/protobuf/src/test/java/com/fasterxml/jackson/dataformat/protobuf/schema/FileDescriptorSetProto3Test.java @@ -0,0 +1,55 @@ +package com.fasterxml.jackson.dataformat.protobuf.schema; + +import org.junit.jupiter.api.Test; + +import com.fasterxml.jackson.dataformat.protobuf.ProtobufTestBase; +import com.fasterxml.jackson.dataformat.protobuf.schema.FileDescriptorSet.DescriptorProto; +import com.fasterxml.jackson.dataformat.protobuf.schema.FileDescriptorSet.FieldDescriptorProto; +import com.fasterxml.jackson.dataformat.protobuf.schema.FileDescriptorSet.FileDescriptorProto; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +// [dataformats-binary#134]: `FileDescriptorProto.getSyntax()` crashed on real +// (lowercase) protoc-produced "proto3" syntax strings, since the enum +// constants are PROTO_2/PROTO_3, not proto2/proto3. +public class FileDescriptorSetProto3Test extends ProtobufTestBase +{ + private FileDescriptorProto messageWithRepeatedInt32(String syntax) + { + FieldDescriptorProto f = new FieldDescriptorProto(); + f.name = "values"; + f.number = 1; + f.label = FieldDescriptorProto.Label.LABEL_REPEATED; + f.type = FieldDescriptorProto.Type.TYPE_INT32; + + DescriptorProto msg = new DescriptorProto(); + msg.name = "Msg"; + msg.field = new FieldDescriptorProto[] { f }; + + FileDescriptorProto fdp = new FileDescriptorProto(); + fdp.name = "test.proto"; + fdp.setPackage("test"); + fdp.syntax = syntax; + fdp.message_type = new DescriptorProto[] { msg }; + return fdp; + } + + @Test + public void testProto3SyntaxStringDoesNotCrashAndDefaultsToPacked() throws Exception + { + FileDescriptorSet fds = new FileDescriptorSet( + new FileDescriptorProto[] { messageWithRepeatedInt32("proto3") }); + ProtobufSchema schema = fds.schemaFor("Msg"); + assertTrue(schema.getRootType().field("values").packed); + } + + @Test + public void testProto2SyntaxStringStaysUnpacked() throws Exception + { + FileDescriptorSet fds = new FileDescriptorSet( + new FileDescriptorProto[] { messageWithRepeatedInt32("proto2") }); + ProtobufSchema schema = fds.schemaFor("Msg"); + assertFalse(schema.getRootType().field("values").packed); + } +} diff --git a/release-notes/VERSION-2.x b/release-notes/VERSION-2.x index c8f7e7a32..2e6b13f0f 100644 --- a/release-notes/VERSION-2.x +++ b/release-notes/VERSION-2.x @@ -14,6 +14,16 @@ Active maintainers: === Releases === ------------------------------------------------------------------------ +2.21.5 (not yet released) + +#134: (protobuf) Repeated fields incorrectly decoded for proto3 schemas + (reported by @vogt31337) +- (protobuf) Fields declared inside a `oneof` block were silently dropped + during schema resolution (found while investigating #134) +- (protobuf) `FileDescriptorSet`-based schema loading crashed on proto3 + descriptor sets due to a syntax-string/enum-constant mismatch + (found while investigating #134) + 2.21.4 (28-May-2026) #691: (cbor) Add parameterized tests covering all ASCII-optimization exit paths in CBORParser