Skip to content

Commit

Permalink
Makes entry splitter work with CharSequence (openzipkin#1284)
Browse files Browse the repository at this point in the history
This allows us to split and parse CharSequence instead of String, which
reduces allocations when parsing W3C tracestate.

This adds a utility `CharSequences` to help with tasks.
  • Loading branch information
adriancole committed Dec 24, 2020
1 parent fa2ee90 commit d47e861
Show file tree
Hide file tree
Showing 11 changed files with 694 additions and 177 deletions.
163 changes: 163 additions & 0 deletions brave/src/main/java/brave/internal/codec/CharSequences.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Copyright 2013-2020 The OpenZipkin Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package brave.internal.codec;

/**
* Most of our parsing tools accept {@link CharSequence} instead of {@link String} to avoid
* unnecessary allocation. This contains common functions available on {@link String}, preferring
* signatures that match our utilities such as {@link EntrySplitter}.
*/
public final class CharSequences {
/**
* Returns true if the input range contains only the expected characters.
*
* @param expected characters to search for in the input
* @param input charSequence to search for {@code expected}
* @param beginIndex begin index of the {@code input}, inclusive
* @param endIndex end index of the {@code input}, exclusive
* @return true if we reached the {@code endIndex} without failures.
* @see String#regionMatches(int, String, int, int) similar, except for {@link String}
*/
public static boolean regionMatches(
CharSequence expected, CharSequence input, int beginIndex, int endIndex) {
if (expected == null) throw new NullPointerException("expected == null");
if (input == null) throw new NullPointerException("input == null");
int regionLength = regionLength(input.length(), beginIndex, endIndex);
if (expected.length() > regionLength) return false;
for (int i = 0, inputIndex = beginIndex; i < regionLength; i++, inputIndex++) {
if (expected.charAt(i) != input.charAt(inputIndex)) return false;
}
return true;
}

/** Opposite of {@link CharSequence#subSequence(int, int)} */
public static CharSequence withoutSubSequence(CharSequence input, int beginIndex, int endIndex) {
if (input == null) throw new NullPointerException("input == null");
int length = input.length();

// Exit early if the region is empty or the entire input
int skippedRegionLength = regionLength(length, beginIndex, endIndex);
if (skippedRegionLength == 0) return input;
if (beginIndex == 0 && endIndex == length) return "";

// Exit early if the region ends on a boundary.
// This doesn't use input.subsequence as it might allocate a String
if (beginIndex == 0) return new SubSequence(input, endIndex, length);
if (endIndex == length) return new SubSequence(input, 0, beginIndex);

// Otherwise, the region to skip in the middle
return new WithoutSubSequence(input, 0, beginIndex, endIndex, length);
}

static int regionLength(int inputLength, int beginIndex, int endIndex) {
if (beginIndex < 0) throw new IndexOutOfBoundsException("beginIndex < 0");
if (endIndex < 0) throw new IndexOutOfBoundsException("endIndex < 0");
if (beginIndex > endIndex) throw new IndexOutOfBoundsException("beginIndex > endIndex");
int regionLength = endIndex - beginIndex;
if (endIndex > inputLength) throw new IndexOutOfBoundsException("endIndex > input");
return regionLength;
}

/** Avoids implicit string allocation when the input calls {@link String#subSequence(int, int)} */
static final class SubSequence implements CharSequence {
final CharSequence input;
final int begin, end, length;

SubSequence(CharSequence input, int begin, int end) {
this.input = input;
this.begin = begin;
this.end = end;
this.length = end - begin;
}

@Override public int length() {
return length;
}

@Override public char charAt(int index) {
if (index < 0) throw new IndexOutOfBoundsException("index < 0");
if (index >= length) throw new IndexOutOfBoundsException("index >= length");
return input.charAt(begin + index);
}

@Override public CharSequence subSequence(int beginIndex, int endIndex) {
int newLength = regionLength(length, beginIndex, endIndex);
if (newLength == 0) return "";
if (newLength == length) return this;
return new SubSequence(input, beginIndex + begin, endIndex + begin);
}

@Override public String toString() {
return new StringBuilder(length).append(input, begin, end).toString();
}
}

static final class WithoutSubSequence implements CharSequence {
final CharSequence input;
final int begin, beginSkip, endSkip, end, skipLength, length;

WithoutSubSequence(
CharSequence input, int begin, int beginSkip, int endSkip, int end) {
this.input = input;
this.begin = begin;
this.beginSkip = beginSkip;
this.endSkip = endSkip;
this.end = end;
this.skipLength = endSkip - beginSkip;
this.length = end - begin - skipLength;
}

@Override public int length() {
return length;
}

@Override public char charAt(int index) {
if (index < 0) throw new IndexOutOfBoundsException("index < 0");
if (index >= length) throw new IndexOutOfBoundsException("index >= length");
index += begin;
if (index >= beginSkip) index += skipLength;
return input.charAt(index);
}

@Override public CharSequence subSequence(int beginIndex, int endIndex) {
int newLength = regionLength(length, beginIndex, endIndex);
if (newLength == 0) return "";
if (newLength == length) return this;

// Move the input positions to the relative offset
beginIndex += begin;
endIndex += begin;

// Check to see if we are before the skipped region
if (endIndex <= beginSkip) return new SubSequence(input, beginIndex, endIndex);

// We now know we either include the skipped region or start after it
endIndex += skipLength;

// If we are after the skipped region, return a subsequence
if (beginIndex >= beginSkip) return new SubSequence(input, beginIndex + skipLength, endIndex);

// We happened to require both sides of the skipped region, so narrow it according to inputs.
return new WithoutSubSequence(input, beginIndex, beginSkip, endSkip, endIndex);
}

@Override public String toString() {
// Careful here to use .append(input, begin, end), not .append(input.subsequence(begin, end))
// The latter can allocate temporary strings, subverting the purpose of using StringBuilder!
return new StringBuilder(length)
.append(input, begin, beginSkip)
.append(input, endSkip, end).toString();
}
}
}
48 changes: 24 additions & 24 deletions brave/src/main/java/brave/internal/codec/EntrySplitter.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
import brave.internal.Platform;

/**
* Splits a character sequence that's in a delimited string, optionally trimming optional whitespace
* (<a href="https://httpwg.org/specs/rfc7230.html#rfc.section.3.2">OWS</a>) before or after
* Splits a delimited character sequence, optionally trimming optional whitespace (<a
* href="https://httpwg.org/specs/rfc7230.html#rfc.section.3.2">OWS</a>) before or after
* delimiters.
*
* <p>This is intended to be initialized as a constant, as doing so per-request will add
Expand All @@ -36,8 +36,8 @@ public static final class Builder {

/**
* When set, {@link Handler} will be called maximum {@code maxEntries} times per parse. After
* that, {@link #parse(Handler, Object, String)} returns false or throws an exception, based on
* {@link #shouldThrow(boolean)}. Default: {@link Integer#MAX_VALUE}.
* that, {@link #parse(Handler, Object, CharSequence)} returns false or throws an exception,
* based on {@link #shouldThrow(boolean)}. Default: {@link Integer#MAX_VALUE}.
*
* <p>This is used to implement strict format constraints. For example, above 32 entries is
* malformed. This is separate from any capacity constraints of the {@link Handler}, which may
Expand Down Expand Up @@ -99,7 +99,7 @@ public Builder trimOWSAroundEntrySeparator(boolean trimOWSAroundEntrySeparator)
* are removed around the {@link #keyValueSeparator(char)}. Default: {@code true}
*
* <p>For example, given the input " k1 = v1 , k2 = v2 ", this trims around the
* "=" character and string boundaries: {@code [(" k1", "v1 "),(" k2", "v2 ")]}.
* "=" character and charSequence boundaries: {@code [(" k1", "v1 "),(" k2", "v2 ")]}.
*
* @see #trimOWSAroundKeyValueSeparator(boolean)
*/
Expand All @@ -110,7 +110,7 @@ public Builder trimOWSAroundKeyValueSeparator(boolean trimOWSAroundKeyValueSepar

/**
* When {@code true}, when a {@link #keyValueSeparator(char)} does not follow a key, {@link
* #parse(Handler, Object, String)} returns false or throws an exception, based on {@link
* #parse(Handler, Object, CharSequence)} returns false or throws an exception, based on {@link
* #shouldThrow(boolean)}. Default: {@code true}.
*
* <p>Setting this to {@code false} makes "k1,k2=v2" interpreted the same as if there was
Expand Down Expand Up @@ -151,20 +151,20 @@ public interface Handler<T> {
*
* <p>After validating, typically strings will be parsed from the input like so:
* <pre>{@code
* String key = input.substring(beginKey, endKey);
* String value = input.substring(beginValue, endValue);
* String key = input.subSequence(beginKey, endKey).toString();
* String value = input.subSequence(beginValue, endValue).toString();
* }</pre>
*
* @param target receiver of parsed entries
* @param input string including data to parse
* @param beginKey begin index of the entry's key in {@code input}, inclusive
* @param endKey end index of the entry's key in {@code input}, exclusive
* @param target receiver of parsed entries
* @param input character sequence at least as large as the index parameters
* @param beginKey begin index of the entry's key in {@code input}, inclusive
* @param endKey end index of the entry's key in {@code input}, exclusive
* @param beginValue begin index of the entry's value in {@code input}, inclusive
* @param endValue end index of the entry's value in {@code input}, exclusive
* @param endValue end index of the entry's value in {@code input}, exclusive
* @return true if we reached the {@code endIndex} without failures.
*/
boolean onEntry(
T target, String input, int beginKey, int endKey, int beginValue, int endValue);
T target, CharSequence input, int beginKey, int endKey, int beginValue, int endValue);
}

final char keyValueSeparator, entrySeparator;
Expand All @@ -183,31 +183,31 @@ boolean onEntry(
shouldThrow = builder.shouldThrow;
missingKey = "Invalid input: no key before '" + keyValueSeparator + "'";
missingKeyValueSeparator =
"Invalid input: missing key value separator '" + keyValueSeparator + "'";
"Invalid input: missing key value separator '" + keyValueSeparator + "'";
overMaxEntries = "Invalid input: over " + maxEntries + " entries";
}

/**
* @param handler parses entries emitted upon success
* @param target receiver of parsed entries
* @param input string including data to parse
* @param target receiver of parsed entries
* @param input character sequence at least as large as the index parameters
* @return true if we reached the {@code endIndex} without failures.
*/
public <T> boolean parse(Handler<T> handler, T target, String input) {
public <T> boolean parse(Handler<T> handler, T target, CharSequence input) {
if (input == null) throw new NullPointerException("input == null");
return parse(handler, target, input, 0, input.length());
}

/**
* @param handler parses entries emitted upon success
* @param target receiver of parsed entries
* @param input string including data to parse
* @param handler parses entries emitted upon success
* @param target receiver of parsed entries
* @param input character sequence at least as large as the index parameters
* @param beginIndex begin index of the {@code input}, inclusive
* @param endIndex end index of the {@code input}, exclusive
* @param endIndex end index of the {@code input}, exclusive
* @return true if we reached the {@code endIndex} without failures.
*/
public <T> boolean parse(
Handler<T> handler, T target, String input, int beginIndex, int endIndex) {
Handler<T> handler, T target, CharSequence input, int beginIndex, int endIndex) {
if (handler == null) throw new NullPointerException("handler == null");
if (target == null) throw new NullPointerException("target == null");
if (input == null) throw new NullPointerException("input == null");
Expand Down Expand Up @@ -291,7 +291,7 @@ public <T> boolean parse(
return true;
}

static int rewindOWS(String input, int beginIndex, int endIndex) {
static int rewindOWS(CharSequence input, int beginIndex, int endIndex) {
// endIndex is a boundary. we need to begin looking one character before it.
while (isOWS(input.charAt(endIndex - 1))) {
if (--endIndex == beginIndex) return beginIndex; // trim whitespace
Expand Down
2 changes: 1 addition & 1 deletion brave/src/main/java/brave/internal/codec/IpLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public final class IpLiteral {
return ip;
}

// All the below code is from zipkin2.Endpoint, copy/pasted here to prevent a depedency.
// All the below code is from zipkin2.Endpoint, copy/pasted here to prevent a dependency.
public enum IpFamily {
Unknown,
IPv4,
Expand Down
2 changes: 1 addition & 1 deletion brave/src/main/java/brave/internal/codec/WriteBuffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ final int pos() {
return pos;
}

public void writeAscii(String v) {
public void writeAscii(CharSequence v) {
for (int i = 0, length = v.length(); i < length; i++) {
writeByte(v.charAt(i) & 0xff);
}
Expand Down
10 changes: 6 additions & 4 deletions brave/src/main/java/brave/internal/extra/Extra.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
/**
* Holds extended state in {@link TraceContext#extra()} or {@link TraceContextOrSamplingFlags#extra()}.
*
* <p>The implementation of this type uses copy-on-write semantics to prevent changes in a
* child context from affecting its parent.
* <p>Implementations copy-on-write when changing {@linkplain #state} to prevent a child context
* from affecting its parent.
*
* @param <E> Use a final type as otherwise tools like {@link TraceContext#findExtra(Class)} will
* not work. In most cases, the type should be package private.
* not work. In most cases, the type should be package private.
* @param <F> The factory that {@link ExtraFactory#create() creates} this instance.
*/
// We handle dynamic vs fixed state internally as it..
Expand Down Expand Up @@ -66,7 +66,9 @@ protected Extra(F factory) {
* <p>Ex 1: If state is a map, and ours includes {@code A -> 1, B -> 2} and theirs
* includes {@code A -> 2, D -> 1}, create a new state of {@code A -> 1, B -> 2, D -> 1}.
*
* <p><em>Note</em>: This operation does not need to {@linkplain #lock lock}.
* <p><em>Note</em>: This operation does not need to {@linkplain #lock lock} as long as changes
* happen before updating {@link #state}. See <a href="https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/package-summary.html#MemoryVisibility">MemoryVisibility</a>
* for details on "happens before" and volatile fields.
*/
protected abstract void mergeStateKeepingOursOnConflict(E that);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ static BaggageCodec get() {
return ENTRY_SPLITTER.parse(this, valueUpdater, value);
}

@Override public boolean onEntry(
ValueUpdater target, String buffer, int beginKey, int endKey, int beginValue, int endValue) {
BaggageField field = BaggageField.create(buffer.substring(beginKey, endKey));
String value = buffer.substring(beginValue, endValue);
@Override public boolean onEntry(ValueUpdater target,
CharSequence buffer, int beginKey, int endKey, int beginValue, int endValue) {
BaggageField field = BaggageField.create(buffer.subSequence(beginKey, endKey).toString());
String value = buffer.subSequence(beginValue, endValue).toString();
return target.updateValue(field, value);
}

Expand Down
Loading

0 comments on commit d47e861

Please sign in to comment.