Skip to content

Commit

Permalink
Rollback GraphiteSanitize to replacing whitespaces (#1099)
Browse files Browse the repository at this point in the history
As reported in #1098 the current sanitizer is too aggressive. It strips
dots which play an important role in the Graphite metrics name convention.

Until we develop a better algorithm, let's do only basic sanitization.
  • Loading branch information
arteam committed Mar 10, 2017
1 parent d170988 commit 9e47bca
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 82 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@
import java.net.Socket;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.util.regex.Pattern;

/**
* A client to a Carbon server via TCP.
*/
public class Graphite implements GraphiteSender {
private static final Pattern WHITESPACE = Pattern.compile("[\\s]+");
// this may be optimistic about Carbon/Graphite
private static final Charset UTF_8 = Charset.forName("UTF-8");

Expand Down Expand Up @@ -186,6 +184,6 @@ public void close() throws IOException {
}

protected String sanitize(String s) {
return WHITESPACE.matcher(s).replaceAll("-");
return GraphiteSanitize.sanitize(s);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ public int getFailures() {
}

public String sanitize(String s) {
return GraphiteSanitize.sanitize(s, '-');
return GraphiteSanitize.sanitize(s);
}

}
Original file line number Diff line number Diff line change
@@ -1,71 +1,16 @@
package com.codahale.metrics.graphite;

class GraphiteSanitize {
/** Replaces all characters from a given string that are not ascii and not alphanumeric
* with a dash */
static String sanitize(String string, char replacement) {
String replaced = replaceFrom(string, replacement);

// Consolidate multiple dashes into a single one
String result = replaced.replace("--", "-");
while (!result.equals(replaced)) {
replaced = result;
result = replaced.replace("--", "-");
}

// Remove any leading or trailing dashes
return strip(result, replacement);
}

/** A char matches when it is a letter or digit and it is ASCII, in Guava terminology,
* this would be CharMatcher.ASCII.and(CharMatcher.JAVA_LETTER_OR_DIGIT).negate() */
private static boolean matches(char c) {
return !(Character.isLetterOrDigit(c) && c <= '\u007f');
}
import java.util.regex.Pattern;

/** Replace all characters that we're interested in with a replacement character,
* heavily inspired by the same code in Guava's CharMatcher */
private static String replaceFrom(String string, char replacement) {
int pos = indexIn(string, 0);
if (pos == -1) {
return string;
}
char[] chars = string.toCharArray();
chars[pos] = replacement;
for (int i = pos + 1; i < chars.length; i++) {
if (matches(chars[i])) {
chars[i] = replacement;
}
}
return new String(chars).trim();
}
class GraphiteSanitize {

/** Finds the first index (or -1) of a character we're interested in */
private static int indexIn(String sequence, int start) {
int length = sequence.length();
for (int i = start; i < length; i++) {
if (matches(sequence.charAt(i))) {
return i;
}
}
return -1;
}
private static final Pattern WHITESPACE = Pattern.compile("[\\s]+");
private static final String DASH = "-";

/** Strips a given character from the beginning and end of a string,
* heavily inspired by Apache's StringUtils.strip
/**
* Trims the string and peplaces all whitespace characters with the provided symbol
*/
private static String strip(String str, char strip) {
int strLen = str.length();
int start = 0;
int end = strLen - 1;
while (start != strLen && str.charAt(start) == strip) {
start++;
}

while (end > start && str.charAt(end) == strip) {
end--;
}

return start != 0 || end != strLen - 1 ? str.substring(start, end + 1) : str;
static String sanitize(String string) {
return WHITESPACE.matcher(string.trim()).replaceAll(DASH);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public void close() throws IOException {
}

protected String sanitize(String s) {
return GraphiteSanitize.sanitize(s, '-');
return GraphiteSanitize.sanitize(s);
}

DatagramChannel getDatagramChannel() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ static class MetricTuple {
}

protected String sanitize(String s) {
return GraphiteSanitize.sanitize(s, '-');
return GraphiteSanitize.sanitize(s);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ public class GraphiteSanitizeTest {
public void sanitizeGraphiteValues() {
SoftAssertions softly = new SoftAssertions();

softly.assertThat(GraphiteSanitize.sanitize("Foo Bar", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar ", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foo Bar ", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar ", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foo@Bar", '-')).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foó Bar", '-')).isEqualTo("Fo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("||ó/.", '-')).isEqualTo("");
softly.assertThat(GraphiteSanitize.sanitize("${Foo:Bar:baz}", '-')).isEqualTo("Foo-Bar-baz");
softly.assertThat(GraphiteSanitize.sanitize("St. Foo's of Bar", '-')).isEqualTo("St-Foo-s-of-Bar");
softly.assertThat(GraphiteSanitize.sanitize("(Foo and (Bar and (Baz)))", '-')).isEqualTo("Foo-and-Bar-and-Baz");
softly.assertThat(GraphiteSanitize.sanitize("Foo.bar.baz", '-')).isEqualTo("Foo-bar-baz");
softly.assertThat(GraphiteSanitize.sanitize("FooBar", '-')).isEqualTo("FooBar");
softly.assertThat(GraphiteSanitize.sanitize("Foo Bar")).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar ")).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar")).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foo Bar ")).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize(" Foo Bar ")).isEqualTo("Foo-Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foo@Bar")).isEqualTo("Foo@Bar");
softly.assertThat(GraphiteSanitize.sanitize("Foó Bar")).isEqualTo("Foó-Bar");
softly.assertThat(GraphiteSanitize.sanitize("||ó/.")).isEqualTo("||ó/.");
softly.assertThat(GraphiteSanitize.sanitize("${Foo:Bar:baz}")).isEqualTo("${Foo:Bar:baz}");
softly.assertThat(GraphiteSanitize.sanitize("St. Foo's of Bar")).isEqualTo("St.-Foo's-of-Bar");
softly.assertThat(GraphiteSanitize.sanitize("(Foo and (Bar and (Baz)))")).isEqualTo("(Foo-and-(Bar-and-(Baz)))");
softly.assertThat(GraphiteSanitize.sanitize("Foo.bar.baz")).isEqualTo("Foo.bar.baz");
softly.assertThat(GraphiteSanitize.sanitize("FooBar")).isEqualTo("FooBar");

softly.assertAll();
}
Expand Down

0 comments on commit 9e47bca

Please sign in to comment.