From 9e5a7927dd48ac7ba23964fdf675e9080ff9f611 Mon Sep 17 00:00:00 2001 From: alhudz Date: Sat, 20 Jun 2026 14:47:57 +0530 Subject: [PATCH] reject sign characters in UnicodeUnescaper hex values --- .../lang3/text/translate/UnicodeUnescaper.java | 5 +++++ .../text/translate/UnicodeUnescaperTest.java | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeUnescaper.java b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeUnescaper.java index bf9c1723c2a..c6a2e0939c9 100644 --- a/src/main/java/org/apache/commons/lang3/text/translate/UnicodeUnescaper.java +++ b/src/main/java/org/apache/commons/lang3/text/translate/UnicodeUnescaper.java @@ -59,6 +59,11 @@ public int translate(final CharSequence input, final int index, final Writer out // Get 4 hex digits final CharSequence unicode = input.subSequence(index + i, index + i + 4); + final char firstChar = unicode.charAt(0); + if (firstChar == '+' || firstChar == '-') { + // Integer.parseInt accepts a leading sign, but a Unicode value is unsigned hex. + throw new IllegalArgumentException("Sign character in unicode value: '" + unicode + "'"); + } try { final int value = Integer.parseInt(unicode.toString(), 16); out.write((char) value); diff --git a/src/test/java/org/apache/commons/lang3/text/translate/UnicodeUnescaperTest.java b/src/test/java/org/apache/commons/lang3/text/translate/UnicodeUnescaperTest.java index ca4b6e0e863..20290c5b879 100644 --- a/src/test/java/org/apache/commons/lang3/text/translate/UnicodeUnescaperTest.java +++ b/src/test/java/org/apache/commons/lang3/text/translate/UnicodeUnescaperTest.java @@ -40,6 +40,23 @@ void testLessThanFour() { "A lack of digits in a Unicode escape sequence failed to throw an exception"); } + @Test + void testSignedValue() { + final UnicodeUnescaper uu = new UnicodeUnescaper(); + + // Integer.parseInt accepts a leading sign, so these used to decode to a bogus char instead of throwing. + assertThrows( + IllegalArgumentException.class, + () -> uu.translate("\\u-047"), + "A signed Unicode escape sequence failed to throw an exception"); + assertThrows( + IllegalArgumentException.class, + () -> uu.translate("\\u++0047"), + "A signed Unicode escape sequence failed to throw an exception"); + // The documented u+ notation is still accepted. + assertEquals("G", uu.translate("\\u+0047"), "Failed to unescape Unicode characters with 'u+' notation"); + } + // Requested in LANG-507 @Test void testUPlus() {