Skip to content

Commit 0af8f2d

Browse files
committed
string performance optimisation
1 parent 0b40af3 commit 0af8f2d

6 files changed

Lines changed: 149 additions & 130 deletions

File tree

src/main/java/com/dashjoin/jsonata/Functions.java

Lines changed: 38 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -770,14 +770,21 @@ public static String join(List<String> strs, String separator) {
770770
return String.join(separator, strs);
771771
}
772772

773+
private static final Pattern DOLLAR_DOLLAR = Pattern.compile("\\$\\$");
774+
private static final Pattern DOLLAR_WITHOUT_ESCAPE = Pattern.compile("([^\\\\]|^)\\$([^0-9^<])");
775+
private static final Pattern DOLLAR_AT_END = Pattern.compile("\\$$");
773776
static String safeReplacement(String in) {
774777
// In JSONata and in Java the $ in the replacement test usually starts the insertion of a capturing group
775778
// In order to replace a simple $ in Java you have to escape the $ with "\$"
776779
// in JSONata you do this with a '$$'
777-
// "\$" followed any character besides '<' and and digit into $ + this character
778-
return in.replaceAll("\\$\\$", "\\\\\\$")
779-
.replaceAll("([^\\\\]|^)\\$([^0-9^<])", "$1\\\\\\$$2")
780-
.replaceAll("\\$$", "\\\\\\$"); // allow $ at end
780+
// "\$" followed any character besides '<' and and digit into $ + this character
781+
if (!in.contains("$")) {
782+
return in;
783+
}
784+
String result = DOLLAR_DOLLAR.matcher(in).replaceAll("\\\\\\$");
785+
result = DOLLAR_WITHOUT_ESCAPE.matcher(result).replaceAll("$1\\\\\\$$2");
786+
result = DOLLAR_AT_END.matcher(result).replaceAll("\\\\\\$");
787+
return result;
781788
}
782789

783790
/**
@@ -813,7 +820,7 @@ static String safeReplaceAll(String s, Pattern pattern, Object _replacement) {
813820
if (!msg.contains("No group")) throw e;
814821

815822
// Adjust replacement to remove the non-existing group
816-
String g = "" + msg.charAt(msg.length()-1);
823+
String g = String.valueOf(msg.charAt(msg.length()-1));
817824

818825
replacement = replacement.replace("$"+g, "");
819826
}
@@ -865,7 +872,7 @@ static String safeReplaceAllFn(String s, Pattern pattern, Object fn) {
865872

866873
/**
867874
* Safe replaceFirst
868-
*
875+
*
869876
* @param s
870877
* @param pattern
871878
* @param replacement
@@ -887,7 +894,7 @@ static String safeReplaceFirst(String s, Pattern pattern, String replacement) {
887894
if (!msg.contains("No group")) throw e;
888895

889896
// Adjust replacement to remove the non-existing group
890-
String g = "" + msg.charAt(msg.length()-1);
897+
String g = String.valueOf(msg.charAt(msg.length()-1));
891898

892899
replacement = replacement.replace("$"+g, "");
893900
}
@@ -909,10 +916,10 @@ public static String replace(String str, Object pattern, Object replacement, Int
909916
return safeReplaceAll(str, (Pattern)pattern, replacement);
910917
}
911918
} else {
912-
919+
913920
if (limit<0)
914921
throw new JException("Fourth argument of replace function must evaluate to a positive number", 0);
915-
922+
916923
for (int i=0; i<limit; i++)
917924
if (pattern instanceof String) {
918925
str = str.replaceFirst((String)pattern, (String)replacement);
@@ -963,6 +970,12 @@ public static String base64decode(String str) {
963970
}
964971
}
965972

973+
private static final Pattern PLUS = Pattern.compile("\\+");
974+
private static final Pattern PERCENT_21 = Pattern.compile("%21");
975+
private static final Pattern PERCENT_27 = Pattern.compile("%27");
976+
private static final Pattern PERCENT_28 = Pattern.compile("%28");
977+
private static final Pattern PERCENT_29 = Pattern.compile("%29");
978+
private static final Pattern PERCENT_7E = Pattern.compile("%7E");
966979
/**
967980
* Encode a string into a component for a url
968981
* @param {String} str - String to encode
@@ -975,14 +988,20 @@ public static String encodeUrlComponent(String str) {
975988
}
976989

977990
Utils.checkUrl(str);
978-
979-
return URLEncoder.encode(str, StandardCharsets.UTF_8)
980-
.replaceAll("\\+", "%20")
981-
.replaceAll("\\%21", "!")
982-
.replaceAll("\\%27", "'")
983-
.replaceAll("\\%28", "(")
984-
.replaceAll("\\%29", ")")
985-
.replaceAll("\\%7E", "~");
991+
992+
String encoded = URLEncoder.encode(str, StandardCharsets.UTF_8);
993+
994+
if (!encoded.contains("+") && !encoded.contains("%")) {
995+
return encoded;
996+
}
997+
998+
encoded = PLUS.matcher(encoded).replaceAll("%20");
999+
encoded = PERCENT_21.matcher(encoded).replaceAll("!");
1000+
encoded = PERCENT_27.matcher(encoded).replaceAll("'");
1001+
encoded = PERCENT_28.matcher(encoded).replaceAll("(");
1002+
encoded = PERCENT_29.matcher(encoded).replaceAll(")");
1003+
encoded = PERCENT_7E.matcher(encoded).replaceAll("~");
1004+
return encoded;
9861005
}
9871006

9881007
/**
@@ -997,7 +1016,7 @@ public static String encodeUrl(String str) {
9971016
}
9981017

9991018
Utils.checkUrl(str);
1000-
1019+
10011020
try {
10021021
// only encode query part: https://docs.jsonata.org/string-functions#encodeurl
10031022
URL url = new URL(str);
@@ -1083,7 +1102,7 @@ public static List<String> split(String str, Object pattern, Number limit) {
10831102
// $split("str", ""): Split string into characters
10841103
int l = limit!=null ? limit.intValue() : Integer.MAX_VALUE;
10851104
for (int i=0; i<str.length() && i<l; i++) {
1086-
result.add( ""+str.charAt(i) );
1105+
result.add(String.valueOf(str.charAt(i)));
10871106
}
10881107
} else {
10891108
// Quote separator string + preserve trailing empty strings (-1)

0 commit comments

Comments
 (0)