diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java new file mode 100644 index 00000000000..7acb31353c5 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringReplacementBenchmark.java @@ -0,0 +1,85 @@ +package datadog.trace.util; + +import java.util.regex.Pattern; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * For simple replacements, Strings.replaceAll out performs String.replaceAll and + * regex.Matcher.replaceAll by 3x. Strings.replaceAll also requires less allocation. + * + *
When pattern matching is needed, compiling the regex to Pattern slightly improves overhead,
+ * but dramatically reduces memory allocation to 1/4x of String.replaceAll.
+ * MacBook M1 with 8 threads (Java 21)
+ *
+ * Benchmark Mode Cnt Score Error Units
+ * StringReplacementBenchmark.regex_replaceAll thrpt 6 13795837.811 ± 3635087.691 ops/s
+ * StringReplacementBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 3988.955 ± 1148.316 MB/sec
+ *
+ * StringReplacementBenchmark.string_replaceAll thrpt 6 14611046.391 ± 4865682.875 ops/s
+ * StringReplacementBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11391.346 ± 3790.917 MB/sec
+ *
+ * StringReplacementBenchmark.strings_replaceAll thrpt 6 39514695.575 ± 7169844.210 ops/s
+ * StringReplacementBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 2777.083 ± 506.909 MB/sec
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+public class StringReplacementBenchmark {
+ static final String[] INPUTS = {
+ "foo",
+ "baz",
+ "foobar",
+ "foobaz",
+ "foo=baz",
+ "bar=foo",
+ "foo=foo&bar=foo",
+ "lorem ipsum",
+ "datadog"
+ };
+
+ static int sharedInputIndex = 0;
+
+ static String nextInput() {
+ int localIndex = ++sharedInputIndex;
+ if (localIndex >= INPUTS.length) {
+ sharedInputIndex = localIndex = 0;
+ }
+ return INPUTS[localIndex];
+ }
+
+ @Benchmark
+ public String string_replaceAll() {
+ return _string_replaceAll(nextInput());
+ }
+
+ static String _string_replaceAll(String input) {
+ // Underneath, this does Pattern.compile("foo").matcher(str).replaceAll()
+ return input.replaceAll("foo", "*redacted*");
+ }
+
+ static final Pattern REGEX_COMPILED = Pattern.compile("foo");
+
+ @Benchmark
+ public String regex_replaceAll() {
+ return _regex_replaceAll(nextInput());
+ }
+
+ static String _regex_replaceAll(String input) {
+ return REGEX_COMPILED.matcher(input).replaceAll("*redcated*");
+ }
+
+ @Benchmark
+ public String strings_replaceAll() {
+ return _strings_replaceAll(nextInput());
+ }
+
+ static String _strings_replaceAll(String input) {
+ return Strings.replaceAll(input, "foo", "*redacted*");
+ }
+}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java
new file mode 100644
index 00000000000..1c20fe1cfec
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/StringSplitBenchmark.java
@@ -0,0 +1,92 @@
+package datadog.trace.util;
+
+import java.util.regex.Pattern;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Strings.split is generally faster for String processing, since it create SubSequences that are
+ * views into the backing String rather than new String objects.
+ * Benchmark (testStr) Mode Cnt Score Error Units
+ * StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s
+ * StringSplitBenchmark.string_split EMPTY thrpt 6 1035461179.368 ± 60212686.921 ops/s
+ * StringSplitBenchmark.strings_split EMPTY thrpt 6 8161781738.019 ± 178530888.497 ops/s
+ *
+ * StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s
+ * StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s
+ * StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s
+ *
+ * StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s
+ * StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s
+ * StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s
+ *
+ * StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s
+ * StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s
+ * StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s
+ *
+ * StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s
+ * StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s
+ * StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+@State(Scope.Benchmark)
+public class StringSplitBenchmark {
+ public enum TestString {
+ EMPTY(""),
+ TRIVIAL("app_key=1111"),
+ SMALL("app_key=1111&foo=bar&baz=quux"),
+ MEDIUM(repeat("app_key=1111", '&', 100)),
+ LARGE(repeat("app_key=1111&application_key=2222&token=0894-4832", '&', 4096));
+
+ final String str;
+
+ TestString(String str) {
+ this.str = str;
+ }
+ };
+
+ @Param TestString testStr;
+
+ static final String repeat(String repeat, char separator, int length) {
+ StringBuilder builder = new StringBuilder(length);
+ builder.append(repeat);
+ while (builder.length() + repeat.length() + 1 < length) {
+ builder.append(separator).append(repeat);
+ }
+ return builder.toString();
+ }
+
+ @Benchmark
+ public void string_split(Blackhole bh) {
+ for (String substr : this.testStr.str.split("\\&")) {
+ bh.consume(substr);
+ }
+ }
+
+ static final Pattern PATTERN = Pattern.compile("\\&");
+
+ @Benchmark
+ public void pattern_split(Blackhole bh) {
+ for (String str : PATTERN.split(this.testStr.str)) {
+ bh.consume(str);
+ }
+ }
+
+ @Benchmark
+ public void strings_split(Blackhole bh) {
+ for (SubSequence subSeq : Strings.split(this.testStr.str, '&')) {
+ bh.consume(subSeq);
+ }
+ }
+}
diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java
new file mode 100644
index 00000000000..d24755e950b
--- /dev/null
+++ b/internal-api/src/jmh/java/datadog/trace/util/StringSubSequenceBenchmark.java
@@ -0,0 +1,64 @@
+package datadog.trace.util;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Strings.substring has 5x throughput. This is primarily achieved through less allocation.
+ *
+ *
NOTE: The higher allocation rate is misleading because 5x the work was performed. After
+ * accounting for the 5x throughput difference, the actual allocation rate is 0.25x that of
+ * String.substring or String.subSequence / SubSequence.of.
+ * Benchmark Mode Cnt Score Error Units
+ * StringSubSequenceBenchmark.string_subSequence thrpt 6 140369998.493 ± 4387855.861 ops/s
+ * StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate thrpt 6 88880.463 ± 2778.032 MB/sec
+ *
+ * StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s
+ * StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec
+ *
+ * StringSubSequenceBenchmark.subSequence thrpt 6 679669385.260 ± 7194043.619 ops/s
+ * StringSubSequenceBenchmark.subSequence:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec
+ *
+ */
+@Fork(2)
+@Warmup(iterations = 2)
+@Measurement(iterations = 3)
+@Threads(8)
+public class StringSubSequenceBenchmark {
+ static final String LOREM_IPSUM =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
+
+ @Benchmark
+ public void string_substring(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(str.substring(i, Math.min(i + 100, len)));
+ }
+ }
+
+ @Benchmark
+ public void string_subSequence(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(str.subSequence(i, Math.min(i + 100, len)));
+ }
+ }
+
+ @Benchmark
+ public void subSequence(Blackhole bh) {
+ String str = LOREM_IPSUM;
+ int len = str.length();
+
+ for (int i = 0; i < str.length(); i += 100) {
+ bh.consume(SubSequence.of(str, i, Math.min(i + 100, len)));
+ }
+ }
+}
diff --git a/internal-api/src/main/java/datadog/trace/util/Strings.java b/internal-api/src/main/java/datadog/trace/util/Strings.java
index efca9430007..5a8b1997121 100644
--- a/internal-api/src/main/java/datadog/trace/util/Strings.java
+++ b/internal-api/src/main/java/datadog/trace/util/Strings.java
@@ -5,6 +5,9 @@
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nullable;
@@ -180,4 +183,143 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri
return null;
}
}
+
+ /** Low overhead replaceAll */
+ public static String replaceAll(String input, String needle, String replacement) {
+ int index = input.indexOf(needle);
+ if (index == -1) return input;
+
+ int needleLen = needle.length();
+
+ StringBuilder builder = new StringBuilder(input.length() + 10);
+ builder.append(input, 0, index);
+ builder.append(replacement);
+
+ int prevIndex = index;
+ index = input.indexOf(needle, index + needleLen);
+ for (; index != -1; prevIndex = index, index = input.indexOf(needle, index + needleLen)) {
+ builder.append(input, prevIndex + needleLen, index);
+ builder.append(replacement);
+ }
+ builder.append(input, prevIndex + needleLen, input.length());
+
+ return builder.toString();
+ }
+
+ /**
+ * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
+ * is usually just a wrapper around String.substring), this routine doesn't allocate a new String
+ * or byte[]/char[].
+ */
+ public static final SubSequence subSequence(String str, int beginIndex) {
+ return new SubSequence(str, beginIndex, str.length());
+ }
+
+ /**
+ * Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
+ * is usually just a wrapper around String.substring), this routine doesn't allocate a new
+ * String or byte[] / char[].
+ */
+ public static final SubSequence subSequence(String str, int beginIndex, int endIndex) {
+ return new SubSequence(str, beginIndex, endIndex);
+ }
+
+ /**
+ * Provides an IterablesplitChar
+ * . Unlike other approaches to splitting, this routine doesn't allocate any new
+ * String or byte[] / char[]
+ */
+ public static final IterableCharSequence that is view into a sub-sequencce of a String Unlike
+ * String.subSequence, this class doesn't allocate an additional String,
+ * char[], or byte[]
+ */
+public final class SubSequence implements CharSequence {
+ public static final SubSequence EMPTY = new SubSequence("", 0, 0);
+
+ /**
+ * SubSequence from beginIndex to end of str Equivalent to
+ * str.subSequence(str, startIndex)
+ */
+ public static final SubSequence of(String str, int startIndex) {
+ return new SubSequence(str, startIndex, str.length());
+ }
+
+ /**
+ * SubSequence from beginIndex inclusive to endIndex exclusive of
+ * str Equivalent to str.subSequence(str, startIndex, endIndex)
+ */
+ public static final SubSequence of(String str, int startIndex, int endIndex) {
+ return new SubSequence(str, startIndex, endIndex);
+ }
+
+ private final String str;
+ private final int beginIndex;
+ private final int endIndex;
+
+ private String cachedSubstr = null;
+
+ SubSequence(String str, int startIndex, int endIndex) {
+ this.str = str;
+ this.beginIndex = startIndex;
+ this.endIndex = endIndex;
+ }
+
+ /** Beginning index of the subseqence in the backing String - can be useful in text processing */
+ public int beginIndex() {
+ return this.beginIndex;
+ }
+
+ /** Ending index of the subsequence in the backing String - can be useful in text processing */
+ public int endIndex() {
+ return this.endIndex;
+ }
+
+ @Override
+ public char charAt(int index) {
+ return this.str.charAt(this.beginIndex + index);
+ }
+
+ @Override
+ public int length() {
+ return this.endIndex - this.beginIndex;
+ }
+
+ @Override
+ public SubSequence subSequence(int start, int end) {
+ int newBeginIndex = this.beginIndex + start;
+ int newEndIndex = this.beginIndex + start + end;
+
+ return new SubSequence(this.str, newBeginIndex, newEndIndex);
+ }
+
+ /** Appends this SubSequence to the StringBuilder Equivalent to builder.append(this) but faster */
+ public void appendTo(StringBuilder builder) {
+ int beginIndex = this.beginIndex;
+ int endIndex = this.endIndex;
+
+ // Guards against the special case empty SubSequence at this.str.length
+ if (beginIndex != endIndex) builder.append(this.str, beginIndex, endIndex);
+ }
+
+ /** Returns the hash code as backingStr.substr(beginIndex, endIndex).hashCode() */
+ @Override
+ public int hashCode() {
+ return this.toString().hashCode();
+ }
+
+ /**
+ * Also handles String comparisons this.equals(backingStr.substr(beginIndex, endIndex)) is true
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof CharSequence)) return false;
+
+ return this.equals((CharSequence) obj);
+ }
+
+ public final boolean equals(CharSequence that) {
+ int thisLen = this.length();
+ int thatLen = that.length();
+
+ if (thisLen != thatLen) return false;
+
+ for (int i = 0; i < Math.min(this.length(), that.length()); ++i) {
+ if (this.charAt(i) != that.charAt(i)) return false;
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ String cached = this.cachedSubstr;
+ if (cached != null) return cached;
+
+ int beginIndex = this.beginIndex;
+ int endIndex = this.endIndex;
+
+ String substr = (beginIndex == endIndex) ? "" : this.str.substring(beginIndex, endIndex);
+ this.cachedSubstr = substr;
+ return substr;
+ }
+}
diff --git a/internal-api/src/test/java/datadog/trace/util/StringsTest2.java b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java
new file mode 100644
index 00000000000..c949da41a5d
--- /dev/null
+++ b/internal-api/src/test/java/datadog/trace/util/StringsTest2.java
@@ -0,0 +1,116 @@
+package datadog.trace.util;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Iterator;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+public class StringsTest2 {
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllNoReplace() {
+ assertEquals("foobar", Strings.replaceAll("foobar", "dne", "unchanged"));
+ }
+
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllSingleReplace() {
+ assertEquals("foobaz", Strings.replaceAll("foobar", "bar", "baz"));
+ }
+
+ @Test
+ @DisplayName("replaceAll - single replace")
+ public void replaceAllMultiReplace() {
+ assertEquals("foo=baz&quux=baz", Strings.replaceAll("foo=bar&quux=bar", "bar", "baz"));
+ }
+
+ @Test
+ @DisplayName("split - empty")
+ public void splitEmpty() {
+ Iterator