Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package datadog.trace.util;

import java.util.regex.Pattern;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
* For simple replacements, Strings.replaceAll out performs String.replaceAll and
* regex.Matcher.replaceAll by 3x. Strings.replaceAll also requires less allocation.
*
* <p>When pattern matching is needed, compiling the regex to Pattern slightly improves overhead,
* but dramatically reduces memory allocation to 1/4x of String.replaceAll. <code>
* MacBook M1 with 8 threads (Java 21)
*
* Benchmark Mode Cnt Score Error Units
* StringReplacementBenchmark.regex_replaceAll thrpt 6 13795837.811 ± 3635087.691 ops/s
* StringReplacementBenchmark.regex_replaceAll:gc.alloc.rate thrpt 6 3988.955 ± 1148.316 MB/sec
*
* StringReplacementBenchmark.string_replaceAll thrpt 6 14611046.391 ± 4865682.875 ops/s
* StringReplacementBenchmark.string_replaceAll:gc.alloc.rate thrpt 6 11391.346 ± 3790.917 MB/sec
*
* StringReplacementBenchmark.strings_replaceAll thrpt 6 39514695.575 ± 7169844.210 ops/s
* StringReplacementBenchmark.strings_replaceAll:gc.alloc.rate thrpt 6 2777.083 ± 506.909 MB/sec
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
public class StringReplacementBenchmark {
static final String[] INPUTS = {
"foo",
"baz",
"foobar",
"foobaz",
"foo=baz",
"bar=foo",
"foo=foo&bar=foo",
"lorem ipsum",
"datadog"
};

static int sharedInputIndex = 0;

static String nextInput() {
int localIndex = ++sharedInputIndex;
if (localIndex >= INPUTS.length) {
sharedInputIndex = localIndex = 0;
}
return INPUTS[localIndex];
}

@Benchmark
public String string_replaceAll() {
return _string_replaceAll(nextInput());
}

static String _string_replaceAll(String input) {
// Underneath, this does Pattern.compile("foo").matcher(str).replaceAll()
return input.replaceAll("foo", "*redacted*");
}

static final Pattern REGEX_COMPILED = Pattern.compile("foo");

@Benchmark
public String regex_replaceAll() {
return _regex_replaceAll(nextInput());
}

static String _regex_replaceAll(String input) {
return REGEX_COMPILED.matcher(input).replaceAll("*redcated*");
}

@Benchmark
public String strings_replaceAll() {
return _strings_replaceAll(nextInput());
}

static String _strings_replaceAll(String input) {
return Strings.replaceAll(input, "foo", "*redacted*");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package datadog.trace.util;

import java.util.regex.Pattern;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

/**
* Strings.split is generally faster for String processing, since it create SubSequences that are
* views into the backing String rather than new String objects. <code>
* Benchmark (testStr) Mode Cnt Score Error Units
* StringSplitBenchmark.pattern_split EMPTY thrpt 6 291274421.621 ± 14834420.899 ops/s
* StringSplitBenchmark.string_split EMPTY thrpt 6 1035461179.368 ± 60212686.921 ops/s
* StringSplitBenchmark.strings_split EMPTY thrpt 6 8161781738.019 ± 178530888.497 ops/s
*
* StringSplitBenchmark.pattern_split TRIVIAL thrpt 6 83982270.075 ± 10250565.633 ops/s
* StringSplitBenchmark.string_split TRIVIAL thrpt 6 848615850.339 ± 42453569.634 ops/s
* StringSplitBenchmark.strings_split TRIVIAL thrpt 6 1765290890.948 ± 160053487.111 ops/s
*
* StringSplitBenchmark.pattern_split SMALL thrpt 6 27383819.756 ± 5454020.100 ops/s
* StringSplitBenchmark.string_split SMALL thrpt 6 149047480.037 ± 6124271.615 ops/s
* StringSplitBenchmark.strings_split SMALL thrpt 6 564058097.162 ± 49305418.971 ops/s
*
* StringSplitBenchmark.pattern_split MEDIUM thrpt 6 14879131.729 ± 1981850.920 ops/s
* StringSplitBenchmark.string_split MEDIUM thrpt 6 51237769.598 ± 1808521.138 ops/s
* StringSplitBenchmark.strings_split MEDIUM thrpt 6 176976970.705 ± 6813886.658 ops/s
*
* StringSplitBenchmark.pattern_split LARGE thrpt 6 482340.838 ± 24903.187 ops/s
* StringSplitBenchmark.string_split LARGE thrpt 6 2460212.879 ± 86911.652 ops/s
* StringSplitBenchmark.strings_split LARGE thrpt 6 4023658.103 ± 30305.699 ops/s
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
@State(Scope.Benchmark)
public class StringSplitBenchmark {
public enum TestString {
EMPTY(""),
TRIVIAL("app_key=1111"),
SMALL("app_key=1111&foo=bar&baz=quux"),
MEDIUM(repeat("app_key=1111", '&', 100)),
LARGE(repeat("app_key=1111&application_key=2222&token=0894-4832", '&', 4096));

final String str;

TestString(String str) {
this.str = str;
}
};

@Param TestString testStr;

static final String repeat(String repeat, char separator, int length) {
StringBuilder builder = new StringBuilder(length);
builder.append(repeat);
while (builder.length() + repeat.length() + 1 < length) {
builder.append(separator).append(repeat);
}
return builder.toString();
}

@Benchmark
public void string_split(Blackhole bh) {
for (String substr : this.testStr.str.split("\\&")) {
bh.consume(substr);
}
}

static final Pattern PATTERN = Pattern.compile("\\&");

@Benchmark
public void pattern_split(Blackhole bh) {
for (String str : PATTERN.split(this.testStr.str)) {
bh.consume(str);
}
}

@Benchmark
public void strings_split(Blackhole bh) {
for (SubSequence subSeq : Strings.split(this.testStr.str, '&')) {
bh.consume(subSeq);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package datadog.trace.util;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

/**
* Strings.substring has 5x throughput. This is primarily achieved through less allocation.
*
* <p>NOTE: The higher allocation rate is misleading because 5x the work was performed. After
* accounting for the 5x throughput difference, the actual allocation rate is 0.25x that of
* String.substring or String.subSequence / SubSequence.of. <code>
* Benchmark Mode Cnt Score Error Units
* StringSubSequenceBenchmark.string_subSequence thrpt 6 140369998.493 ± 4387855.861 ops/s
* StringSubSequenceBenchmark.string_subSequence:gc.alloc.rate thrpt 6 88880.463 ± 2778.032 MB/sec
*
* StringSubSequenceBenchmark.string_substring thrpt 6 136916708.207 ± 12299226.575 ops/s
* StringSubSequenceBenchmark.string_substring:gc.alloc.rate thrpt 6 86689.852 ± 7777.642 MB/sec
*
* StringSubSequenceBenchmark.subSequence thrpt 6 679669385.260 ± 7194043.619 ops/s
* StringSubSequenceBenchmark.subSequence:gc.alloc.rate thrpt 6 103702.745 ± 1095.741 MB/sec
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
public class StringSubSequenceBenchmark {
static final String LOREM_IPSUM =
"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";

@Benchmark
public void string_substring(Blackhole bh) {
String str = LOREM_IPSUM;
int len = str.length();

for (int i = 0; i < str.length(); i += 100) {
bh.consume(str.substring(i, Math.min(i + 100, len)));
}
}

@Benchmark
public void string_subSequence(Blackhole bh) {
String str = LOREM_IPSUM;
int len = str.length();

for (int i = 0; i < str.length(); i += 100) {
bh.consume(str.subSequence(i, Math.min(i + 100, len)));
}
}

@Benchmark
public void subSequence(Blackhole bh) {
String str = LOREM_IPSUM;
int len = str.length();

for (int i = 0; i < str.length(); i += 100) {
bh.consume(SubSequence.of(str, i, Math.min(i + 100, len)));
}
}
}
142 changes: 142 additions & 0 deletions internal-api/src/main/java/datadog/trace/util/Strings.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Collections;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.concurrent.ThreadLocalRandom;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -180,4 +183,143 @@ public static String coalesce(@Nullable final String first, @Nullable final Stri
return null;
}
}

/** Low overhead replaceAll */
public static String replaceAll(String input, String needle, String replacement) {
int index = input.indexOf(needle);
if (index == -1) return input;

int needleLen = needle.length();

StringBuilder builder = new StringBuilder(input.length() + 10);
builder.append(input, 0, index);
builder.append(replacement);

int prevIndex = index;
index = input.indexOf(needle, index + needleLen);
for (; index != -1; prevIndex = index, index = input.indexOf(needle, index + needleLen)) {
builder.append(input, prevIndex + needleLen, index);
builder.append(replacement);
}
builder.append(input, prevIndex + needleLen, input.length());

return builder.toString();
}

/**
* Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
* is usually just a wrapper around String.substring), this routine doesn't allocate a new String
* or byte[]/char[].
*/
public static final SubSequence subSequence(String str, int beginIndex) {
return new SubSequence(str, beginIndex, str.length());
}

/**
* Provides a SubSequence which a view into the provided String Unlike String.subSequence (which
* is usually just a wrapper around String.substring), this routine doesn't allocate a new <code>
* String</code> or <code>byte[]</code> / <code>char[]</code>.
*/
public static final SubSequence subSequence(String str, int beginIndex, int endIndex) {
return new SubSequence(str, beginIndex, endIndex);
}

/**
* Provides an Iterable<SubSequence> where the sub-sequences are separated by <code>splitChar
* </code>. Unlike other approaches to splitting, this routine doesn't allocate any new <code>
* String</code> or <code>byte[]</code> / <code>char[]</code>
*/
public static final Iterable<SubSequence> split(String str, char splitChar) {
if (str.isEmpty()) {
return Collections.emptyList();
}

int firstIndex = str.indexOf(splitChar);
if (firstIndex == -1) {
return Collections.singletonList(subSequence(str, 0));
}

return new SplitIterable(str, splitChar, firstIndex);
}

static final class SplitIterable implements Iterable<SubSequence> {
private final String str;
private final int len;
private final char splitChar;
private final int firstIndex;

SplitIterable(String str, char splitChar, int firstIndex) {
this.str = str;
this.len = str.length();
this.splitChar = splitChar;
this.firstIndex = firstIndex;
}

@Override
public SplitIterator iterator() {
return new SplitIterator(this.str, this.len, this.splitChar, this.firstIndex);
}
}

static final class SplitIterator implements Iterator<SubSequence> {
private final String str;
private final int len;
private final char splitChar;

private int curIndex;
private int nextIndex;

SplitIterator(String str, int len, char splitChar, int firstIndex) {
this.str = str;
this.len = len;
this.splitChar = splitChar;

this.curIndex = 0;
this.nextIndex = firstIndex == -1 ? len : firstIndex;
}

@Override
public boolean hasNext() {
return (this.curIndex <= this.len);
}

@Override
public SubSequence next() {
int curIndex = this.curIndex;
int len = this.len;

if (curIndex > len) throw new NoSuchElementException();

SubSequence subSeq;

int nextIndex = this.nextIndex;
if (nextIndex == len - 1) {
// Handles the case where there's a trailing separator,
// curIndex is moved to len to represent the empty string
// after the trailing separator

// Next call then goes into the special case below
subSeq = new SubSequence(this.str, curIndex, nextIndex);
this.curIndex = len;
this.nextIndex = len;
} else if (curIndex == len) {
// Handles the empty string after the trailing separator
// curIndex is given the terminating value `len + 1`

// Don't use SubSequence.EMPTY because it wouldn't have
// the correct beginIndex
subSeq = new SubSequence(this.str, len, len);
this.curIndex = len + 1;
} else {
subSeq = new SubSequence(this.str, curIndex, nextIndex);

// core advancing logic
this.curIndex = nextIndex + 1;
int searchIndex = this.str.indexOf(this.splitChar, nextIndex + 1);
this.nextIndex = (searchIndex == -1) ? len : searchIndex;
}

return subSeq;
}
}
}
Loading
Loading