Makes entry splitter work with CharSequence (openzipkin#1284)

This allows us to split and parse CharSequence instead of String, which reduces allocations when parsing W3C tracestate. This adds a utility `CharSequences` to help with tasks.
OSSresearch-SuperTeam · Dec 24, 2020 · d47e861 · d47e861
1 parent fa2ee90
commit d47e861
Show file tree

Hide file tree

Showing 11 changed files with 694 additions and 177 deletions.
diff --git a/brave/src/main/java/brave/internal/codec/CharSequences.java b/brave/src/main/java/brave/internal/codec/CharSequences.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2013-2020 The OpenZipkin Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package brave.internal.codec;
+
+/**
+ * Most of our parsing tools accept {@link CharSequence} instead of {@link String} to avoid
+ * unnecessary allocation. This contains common functions available on {@link String}, preferring
+ * signatures that match our utilities such as {@link EntrySplitter}.
+ */
+public final class CharSequences {
+  /**
+   * Returns true if the input range contains only the expected characters.
+   *
+   * @param expected   characters to search for in the input
+   * @param input      charSequence to search for {@code expected}
+   * @param beginIndex begin index of the {@code input}, inclusive
+   * @param endIndex   end index of the {@code input}, exclusive
+   * @return true if we reached the {@code endIndex} without failures.
+   * @see String#regionMatches(int, String, int, int) similar, except for {@link String}
+   */
+  public static boolean regionMatches(
+    CharSequence expected, CharSequence input, int beginIndex, int endIndex) {
+    if (expected == null) throw new NullPointerException("expected == null");
+    if (input == null) throw new NullPointerException("input == null");
+    int regionLength = regionLength(input.length(), beginIndex, endIndex);
+    if (expected.length() > regionLength) return false;
+    for (int i = 0, inputIndex = beginIndex; i < regionLength; i++, inputIndex++) {
+      if (expected.charAt(i) != input.charAt(inputIndex)) return false;
+    }
+    return true;
+  }
+
+  /** Opposite of {@link CharSequence#subSequence(int, int)} */
+  public static CharSequence withoutSubSequence(CharSequence input, int beginIndex, int endIndex) {
+    if (input == null) throw new NullPointerException("input == null");
+    int length = input.length();
+
+    // Exit early if the region is empty or the entire input
+    int skippedRegionLength = regionLength(length, beginIndex, endIndex);
+    if (skippedRegionLength == 0) return input;
+    if (beginIndex == 0 && endIndex == length) return "";
+
+    // Exit early if the region ends on a boundary.
+    // This doesn't use input.subsequence as it might allocate a String
+    if (beginIndex == 0) return new SubSequence(input, endIndex, length);
+    if (endIndex == length) return new SubSequence(input, 0, beginIndex);
+
+    // Otherwise, the region to skip in the middle
+    return new WithoutSubSequence(input, 0, beginIndex, endIndex, length);
+  }
+
+  static int regionLength(int inputLength, int beginIndex, int endIndex) {
+    if (beginIndex < 0) throw new IndexOutOfBoundsException("beginIndex < 0");
+    if (endIndex < 0) throw new IndexOutOfBoundsException("endIndex < 0");
+    if (beginIndex > endIndex) throw new IndexOutOfBoundsException("beginIndex > endIndex");
+    int regionLength = endIndex - beginIndex;
+    if (endIndex > inputLength) throw new IndexOutOfBoundsException("endIndex > input");
+    return regionLength;
+  }
+
+  /** Avoids implicit string allocation when the input calls {@link String#subSequence(int, int)} */
+  static final class SubSequence implements CharSequence {
+    final CharSequence input;
+    final int begin, end, length;
+
+    SubSequence(CharSequence input, int begin, int end) {
+      this.input = input;
+      this.begin = begin;
+      this.end = end;
+      this.length = end - begin;
+    }
+
+    @Override public int length() {
+      return length;
+    }
+
+    @Override public char charAt(int index) {
+      if (index < 0) throw new IndexOutOfBoundsException("index < 0");
+      if (index >= length) throw new IndexOutOfBoundsException("index >= length");
+      return input.charAt(begin + index);
+    }
+
+    @Override public CharSequence subSequence(int beginIndex, int endIndex) {
+      int newLength = regionLength(length, beginIndex, endIndex);
+      if (newLength == 0) return "";
+      if (newLength == length) return this;
+      return new SubSequence(input, beginIndex + begin, endIndex + begin);
+    }
+
+    @Override public String toString() {
+      return new StringBuilder(length).append(input, begin, end).toString();
+    }
+  }
+
+  static final class WithoutSubSequence implements CharSequence {
+    final CharSequence input;
+    final int begin, beginSkip, endSkip, end, skipLength, length;
+
+    WithoutSubSequence(
+      CharSequence input, int begin, int beginSkip, int endSkip, int end) {
+      this.input = input;
+      this.begin = begin;
+      this.beginSkip = beginSkip;
+      this.endSkip = endSkip;
+      this.end = end;
+      this.skipLength = endSkip - beginSkip;
+      this.length = end - begin - skipLength;
+    }
+
+    @Override public int length() {
+      return length;
+    }
+
+    @Override public char charAt(int index) {
+      if (index < 0) throw new IndexOutOfBoundsException("index < 0");
+      if (index >= length) throw new IndexOutOfBoundsException("index >= length");
+      index += begin;
+      if (index >= beginSkip) index += skipLength;
+      return input.charAt(index);
+    }
+
+    @Override public CharSequence subSequence(int beginIndex, int endIndex) {
+      int newLength = regionLength(length, beginIndex, endIndex);
+      if (newLength == 0) return "";
+      if (newLength == length) return this;
+
+      // Move the input positions to the relative offset
+      beginIndex += begin;
+      endIndex += begin;
+
+      // Check to see if we are before the skipped region
+      if (endIndex <= beginSkip) return new SubSequence(input, beginIndex, endIndex);
+
+      // We now know we either include the skipped region or start after it
+      endIndex += skipLength;
+
+      // If we are after the skipped region, return a subsequence
+      if (beginIndex >= beginSkip) return new SubSequence(input, beginIndex + skipLength, endIndex);
+
+      // We happened to require both sides of the skipped region, so narrow it according to inputs.
+      return new WithoutSubSequence(input, beginIndex, beginSkip, endSkip, endIndex);
+    }
+
+    @Override public String toString() {
+      // Careful here to use .append(input, begin, end), not .append(input.subsequence(begin, end))
+      // The latter can allocate temporary strings, subverting the purpose of using StringBuilder!
+      return new StringBuilder(length)
+        .append(input, begin, beginSkip)
+        .append(input, endSkip, end).toString();
+    }
+  }
+}
diff --git a/brave/src/main/java/brave/internal/codec/EntrySplitter.java b/brave/src/main/java/brave/internal/codec/EntrySplitter.java
@@ -16,8 +16,8 @@
 import brave.internal.Platform;
 
 /**
- * Splits a character sequence that's in a delimited string, optionally trimming optional whitespace
- * (<a href="https://httpwg.org/specs/rfc7230.html#rfc.section.3.2">OWS</a>) before or after
+ * Splits a delimited character sequence, optionally trimming optional whitespace (<a
+ * href="https://httpwg.org/specs/rfc7230.html#rfc.section.3.2">OWS</a>) before or after
  * delimiters.
  *
  * <p>This is intended to be initialized as a constant, as doing so per-request will add
@@ -36,8 +36,8 @@ public static final class Builder {
 
     /**
      * When set, {@link Handler} will be called maximum {@code maxEntries} times per parse. After
-     * that, {@link #parse(Handler, Object, String)} returns false or throws an exception, based on
-     * {@link #shouldThrow(boolean)}. Default: {@link Integer#MAX_VALUE}.
+     * that, {@link #parse(Handler, Object, CharSequence)} returns false or throws an exception,
+     * based on {@link #shouldThrow(boolean)}. Default: {@link Integer#MAX_VALUE}.
      *
      * <p>This is used to implement strict format constraints. For example, above 32 entries is
      * malformed. This is separate from any capacity constraints of the {@link Handler}, which may
@@ -99,7 +99,7 @@ public Builder trimOWSAroundEntrySeparator(boolean trimOWSAroundEntrySeparator)
      * are removed around the {@link #keyValueSeparator(char)}. Default: {@code true}
      *
      * <p>For example, given the input "  k1   =   v1  ,  k2   =   v2  ", this trims around the
-     * "=" character and string boundaries: {@code [("  k1", "v1  "),("  k2", "v2  ")]}.
+     * "=" character and charSequence boundaries: {@code [("  k1", "v1  "),("  k2", "v2  ")]}.
      *
      * @see #trimOWSAroundKeyValueSeparator(boolean)
      */
@@ -110,7 +110,7 @@ public Builder trimOWSAroundKeyValueSeparator(boolean trimOWSAroundKeyValueSepar
 
     /**
      * When {@code true}, when a {@link #keyValueSeparator(char)} does not follow a key, {@link
-     * #parse(Handler, Object, String)} returns false or throws an exception, based on {@link
+     * #parse(Handler, Object, CharSequence)} returns false or throws an exception, based on {@link
      * #shouldThrow(boolean)}. Default: {@code true}.
      *
      * <p>Setting this to {@code false} makes "k1,k2=v2" interpreted the same as if there was
@@ -151,20 +151,20 @@ public interface Handler<T> {
      *
      * <p>After validating, typically strings will be parsed from the input like so:
      * <pre>{@code
-     * String key = input.substring(beginKey, endKey);
-     * String value = input.substring(beginValue, endValue);
+     * String key = input.subSequence(beginKey, endKey).toString();
+     * String value = input.subSequence(beginValue, endValue).toString();
      * }</pre>
      *
-     * @param target receiver of parsed entries
-     * @param input string including data to parse
-     * @param beginKey begin index of the entry's key in {@code input}, inclusive
-     * @param endKey end index of the entry's key in {@code input}, exclusive
+     * @param target     receiver of parsed entries
+     * @param input      character sequence at least as large as the index parameters
+     * @param beginKey   begin index of the entry's key in {@code input}, inclusive
+     * @param endKey     end index of the entry's key in {@code input}, exclusive
      * @param beginValue begin index of the entry's value in {@code input}, inclusive
-     * @param endValue end index of the entry's value in {@code input}, exclusive
+     * @param endValue   end index of the entry's value in {@code input}, exclusive
      * @return true if we reached the {@code endIndex} without failures.
      */
     boolean onEntry(
-        T target, String input, int beginKey, int endKey, int beginValue, int endValue);
+      T target, CharSequence input, int beginKey, int endKey, int beginValue, int endValue);
   }
 
   final char keyValueSeparator, entrySeparator;
@@ -183,31 +183,31 @@ boolean onEntry(
     shouldThrow = builder.shouldThrow;
     missingKey = "Invalid input: no key before '" + keyValueSeparator + "'";
     missingKeyValueSeparator =
-        "Invalid input: missing key value separator '" + keyValueSeparator + "'";
+      "Invalid input: missing key value separator '" + keyValueSeparator + "'";
     overMaxEntries = "Invalid input: over " + maxEntries + " entries";
   }
 
   /**
    * @param handler parses entries emitted upon success
-   * @param target receiver of parsed entries
-   * @param input string including data to parse
+   * @param target  receiver of parsed entries
+   * @param input   character sequence at least as large as the index parameters
    * @return true if we reached the {@code endIndex} without failures.
    */
-  public <T> boolean parse(Handler<T> handler, T target, String input) {
+  public <T> boolean parse(Handler<T> handler, T target, CharSequence input) {
     if (input == null) throw new NullPointerException("input == null");
     return parse(handler, target, input, 0, input.length());
   }
 
   /**
-   * @param handler parses entries emitted upon success
-   * @param target receiver of parsed entries
-   * @param input string including data to parse
+   * @param handler    parses entries emitted upon success
+   * @param target     receiver of parsed entries
+   * @param input      character sequence at least as large as the index parameters
    * @param beginIndex begin index of the {@code input}, inclusive
-   * @param endIndex end index of the {@code input}, exclusive
+   * @param endIndex   end index of the {@code input}, exclusive
    * @return true if we reached the {@code endIndex} without failures.
    */
   public <T> boolean parse(
-      Handler<T> handler, T target, String input, int beginIndex, int endIndex) {
+    Handler<T> handler, T target, CharSequence input, int beginIndex, int endIndex) {
     if (handler == null) throw new NullPointerException("handler == null");
     if (target == null) throw new NullPointerException("target == null");
     if (input == null) throw new NullPointerException("input == null");
@@ -291,7 +291,7 @@ public <T> boolean parse(
     return true;
   }
 
-  static int rewindOWS(String input, int beginIndex, int endIndex) {
+  static int rewindOWS(CharSequence input, int beginIndex, int endIndex) {
     // endIndex is a boundary. we need to begin looking one character before it.
     while (isOWS(input.charAt(endIndex - 1))) {
       if (--endIndex == beginIndex) return beginIndex; // trim whitespace

diff --git a/brave/src/main/java/brave/internal/codec/IpLiteral.java b/brave/src/main/java/brave/internal/codec/IpLiteral.java
@@ -30,7 +30,7 @@ public final class IpLiteral {
     return ip;
   }
 
-  // All the below code is from zipkin2.Endpoint, copy/pasted here to prevent a depedency.
+  // All the below code is from zipkin2.Endpoint, copy/pasted here to prevent a dependency.
   public enum IpFamily {
     Unknown,
     IPv4,

diff --git a/brave/src/main/java/brave/internal/codec/WriteBuffer.java b/brave/src/main/java/brave/internal/codec/WriteBuffer.java
@@ -68,7 +68,7 @@ final int pos() {
     return pos;
   }
 
-  public void writeAscii(String v) {
+  public void writeAscii(CharSequence v) {
     for (int i = 0, length = v.length(); i < length; i++) {
       writeByte(v.charAt(i) & 0xff);
     }

diff --git a/brave/src/main/java/brave/internal/extra/Extra.java b/brave/src/main/java/brave/internal/extra/Extra.java
@@ -21,11 +21,11 @@
 /**
  * Holds extended state in {@link TraceContext#extra()} or {@link TraceContextOrSamplingFlags#extra()}.
  *
- * <p>The implementation of this type uses copy-on-write semantics to prevent changes in a
- * child context from affecting its parent.
+ * <p>Implementations copy-on-write when changing {@linkplain #state} to prevent a child context
+ * from affecting its parent.
  *
  * @param <E> Use a final type as otherwise tools like {@link TraceContext#findExtra(Class)} will
- * not work. In most cases, the type should be package private.
+ *            not work. In most cases, the type should be package private.
  * @param <F> The factory that {@link ExtraFactory#create() creates} this instance.
  */
 // We handle dynamic vs fixed state internally as it..
@@ -66,7 +66,9 @@ protected Extra(F factory) {
    * <p>Ex 1: If state is a map, and ours includes {@code A -> 1, B -> 2} and theirs
    * includes {@code A -> 2, D -> 1}, create a new state of {@code A -> 1, B -> 2, D -> 1}.
    *
-   * <p><em>Note</em>: This operation does not need to {@linkplain #lock lock}.
+   * <p><em>Note</em>: This operation does not need to {@linkplain #lock lock} as long as changes
+   * happen before updating {@link #state}. See <a href="https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/package-summary.html#MemoryVisibility">MemoryVisibility</a>
+   * for details on "happens before" and volatile fields.
    */
   protected abstract void mergeStateKeepingOursOnConflict(E that);
 

diff --git a/brave/src/test/java/brave/features/baggage/SingleHeaderCodec.java b/brave/src/test/java/brave/features/baggage/SingleHeaderCodec.java
@@ -50,10 +50,10 @@ static BaggageCodec get() {
     return ENTRY_SPLITTER.parse(this, valueUpdater, value);
   }
 
-  @Override public boolean onEntry(
-      ValueUpdater target, String buffer, int beginKey, int endKey, int beginValue, int endValue) {
-    BaggageField field = BaggageField.create(buffer.substring(beginKey, endKey));
-    String value = buffer.substring(beginValue, endValue);
+  @Override public boolean onEntry(ValueUpdater target,
+    CharSequence buffer, int beginKey, int endKey, int beginValue, int endValue) {
+    BaggageField field = BaggageField.create(buffer.subSequence(beginKey, endKey).toString());
+    String value = buffer.subSequence(beginValue, endValue).toString();
     return target.updateValue(field, value);
   }