Skip to content

Commit

Permalink
SQL compiler: Use interned strings (vmware#1148)
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Budiu <mbudiu@vmware.com>
  • Loading branch information
Mihai Budiu authored Feb 11, 2022
1 parent c4e297a commit 327861e
Show file tree
Hide file tree
Showing 21 changed files with 379 additions and 235 deletions.
14 changes: 7 additions & 7 deletions sql/lib/sql.dl
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ case TIMEZONE_MINUTE:
case TIMEZONE_HOUR:
*/

function sql_substr(s: string, start: signed<64>, end: signed<64>): string {
string_substr(s, start as bit<64>, (start + end) as bit<64>)
function sql_substr(s: istring, start: signed<64>, end: signed<64>): istring {
string_substr(s.ival(), start as bit<64>, (start + end) as bit<64>).intern()
}

function sql_substr_N(s: Option<string>, start: signed<64>, end: signed<64>): Option<string> {
function sql_substr_N(s: Option<istring>, start: signed<64>, end: signed<64>): Option<istring> {
match (s) {
Some{.x = var a} -> Some{sql_substr(a, start, end)},
_ -> None
Expand Down Expand Up @@ -323,15 +323,15 @@ function avg_float_R(sum_count: (float, float)): float {
function avg_double_R(sum_count: (double, double)): double {
if (sum_count.1 == 64'f0.0) 64'f0.0 else (sum_count.0 / sum_count.1)
}
function sql_concat(s0: string, s1: string): string {
s0 ++ s1
function sql_concat(s0: istring, s1: istring): istring {
(s0.ival() ++ s1.ival()).intern()
}
function sql_concat_N(s0: Option<string>, s1: Option<string>): Option<string> {
function sql_concat_N(s0: Option<istring>, s1: Option<istring>): Option<istring> {
match ((s0,s1)) {
(None, None) -> None,
(None, Some{x}) -> None,
(Some{x}, None) -> None,
(Some{x}, Some{y}) -> Some{x ++ y}
(Some{x}, Some{y}) -> Some{sql_concat(x, y)}
}
}

Expand Down
20 changes: 12 additions & 8 deletions sql/src/main/java/com/vmware/ddlog/ir/DDlogEBinOp.java
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,6 @@ public boolean isBoolean() {
public DDlogEBinOp(@Nullable Node node, BOp bop, DDlogExpression left, DDlogExpression right) {
super(node);
this.bop = bop;
this.left = this.checkNull(left);
this.right = this.checkNull(right);
boolean mayBeNull = left.getType().mayBeNull || right.getType().mayBeNull;
switch (this.bop) {
case Eq:
Expand Down Expand Up @@ -159,19 +157,25 @@ public DDlogEBinOp(@Nullable Node node, BOp bop, DDlogExpression left, DDlogExpr
this.type = DDlogType.reduceType(left.getType(), right.getType());
break;
case Concat:
if (!(left.getType() instanceof DDlogTString))
this.error(this.bop + " is not applied to string type: " + left.getType());
if (!(right.getType() instanceof DDlogTString))
this.error(this.bop + " is not applied to string type: " + right.getType());
if ((left.getType() instanceof DDlogTIString))
left = DDlogTIString.ival(left);
else if ((left.getType() instanceof DDlogTString))
this.error(this.bop + " is not applied to (i)string type: " + left.getType());
if ((right.getType() instanceof DDlogTIString))
right = DDlogTIString.ival(right);
else if (!(right.getType() instanceof DDlogTString))
this.error(this.bop + " is not applied to (i)string type: " + right.getType());
this.type = DDlogType.reduceType(left.getType(), right.getType());
break;
}
this.left = this.checkNull(left);
this.right = this.checkNull(right);
}

@Override
public String toString() {
return "(" + this.left.toString() + " " + this.bop.toString() +
" " + this.right.toString() + ")";
return "(" + this.left + " " + this.bop +
" " + this.right + ")";
}

}
51 changes: 51 additions & 0 deletions sql/src/main/java/com/vmware/ddlog/ir/DDlogEIString.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright (c) 2021 VMware, Inc.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/

package com.vmware.ddlog.ir;

import com.facebook.presto.sql.tree.Node;

import javax.annotation.Nullable;

/**
* An expression that is an interned string.
*/
public class DDlogEIString extends DDlogExpression {
private final String string;

public DDlogEIString(@Nullable Node node, String string) {
super(node, DDlogTIString.instance);
if (string.startsWith("'") && string.endsWith("'"))
string = string.substring(1, string.length() - 1);
this.string = string;
}

@Override
public String toString() {
if (this.string.contains("${")) {
return "i[|" + this.string + "|]";
}
return "i\"" + this.string + "\"";
}
}
63 changes: 63 additions & 0 deletions sql/src/main/java/com/vmware/ddlog/ir/DDlogTIString.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2021 VMware, Inc.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/

package com.vmware.ddlog.ir;

import com.facebook.presto.sql.tree.Node;

import javax.annotation.Nullable;

/**
* Interned string type.
*/
public class DDlogTIString extends DDlogType implements IDDlogBaseType {
public static DDlogTIString instance = new DDlogTIString(null,false);

private DDlogTIString(@Nullable Node node, boolean mayBeNull) { super(node, mayBeNull); }

@Override
public String toString() { return this.wrapOption("istring"); }

@Override
public DDlogType setMayBeNull(boolean mayBeNull) {
if (this.mayBeNull == mayBeNull)
return this;
return new DDlogTIString(this.getNode(), mayBeNull);
}

@Override
public boolean same(DDlogType type) {
if (!super.same(type))
return false;
return type.is(DDlogTIString.class);
}

/**
* Given an expression with type istring unwrap the istring
*/
public static DDlogExpression ival(DDlogExpression expression) {
assert(expression.getType().is(DDlogTIString.class));
return new DDlogEApply(expression.node, "ival", DDlogTString.instance, true, expression);
}
}
8 changes: 8 additions & 0 deletions sql/src/main/java/com/vmware/ddlog/ir/DDlogTString.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,12 @@ public boolean same(DDlogType type) {
return false;
return type.is(DDlogTString.class);
}

/**
* Given an expression with type string intern it
*/
public static DDlogExpression intern(DDlogExpression expression) {
assert(expression.getType().is(DDlogTString.class));
return new DDlogEApply(expression.node, "intern", DDlogTIString.instance, true, expression);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,30 +137,30 @@ protected DDlogExpression visitCast(Cast node, TranslationContext context) {
DDlogExpression e = this.process(node.getExpression(), context);
DDlogType eType = e.getType();
DDlogType destType = SqlSemantics.createType(node, node.getType(), e.getType().mayBeNull);
if (destType.is(DDlogTString.class)) {
if (destType.is(DDlogTIString.class)) {
// convert to string
if (eType.is(DDlogTString.class)) {
if (eType.is(DDlogTIString.class)) {
return e;
} else if (eType.is(DDlogTSigned.class) ||
eType.is(DDlogTBool.class) ||
eType.is(DDlogTFloat.class) ||
eType.is(DDlogTDouble.class) ||
eType.is(DDlogTUser.class)) {
Function<DDlogExpression, DDlogExpression> wrapper =
ex -> new DDlogEString(node, "${" + ex.toString() + "}");
ex -> new DDlogEIString(node, "${" + ex.toString() + "}");
return wrapInMatch(e, destType, wrapper);
} else {
throw new TranslationException("Unsupported cast to string", node);
}
} else if (destType.is(DDlogTFloat.class) || destType.is(DDlogTDouble.class)) {
IsNumericType num = destType.toNumeric();
if (eType.is(DDlogTString.class)) {
if (eType.is(DDlogTIString.class)) {
String suffix = eType.is(DDlogTFloat.class) ? "f" : "d";
// I am lying here, the result is actually Result<>,
// but the unwrap below will remove it.
Function<DDlogExpression, DDlogExpression> wrapper = ex -> {
DDlogExpression parse = new DDlogEApply(node,
"parse_" + suffix, destType.setMayBeNull(true), ex);
"parse_" + suffix, destType.setMayBeNull(true), DDlogTIString.ival(ex));
return new DDlogEApply(node,
"result_unwrap_or_default", destType, parse);
};
Expand Down Expand Up @@ -192,6 +192,14 @@ protected DDlogExpression visitCast(Cast node, TranslationContext context) {
"option_unwrap_or_default", destType, parse);
};
return wrapInMatch(e, destType, wrapper);
} else if (eType.is(DDlogTIString.class)) {
Function<DDlogExpression, DDlogExpression> wrapper = ex -> {
DDlogExpression parse = new DDlogEApply(node,
"parse_dec_i64", DDlogTSigned.signed64.setMayBeNull(true), DDlogTIString.ival(ex));
return new DDlogEApply(node,
"option_unwrap_or_default", destType, parse);
};
return wrapInMatch(e, destType, wrapper);
} else if (eType.is(DDlogTBool.class)) {
Function<DDlogExpression, DDlogExpression> wrapper = ex -> new DDlogEITE(node, ex, num.one(), num.zero());
return wrapInMatch(e, destType, wrapper);
Expand All @@ -215,10 +223,11 @@ protected DDlogExpression visitCast(Cast node, TranslationContext context) {
DDlogType exType = ex.getType();
// At least in MySQL integers are converted to dates as if they were strings...
if (exType.is(DDlogTInt.class) || exType.is(DDlogTSigned.class) || exType.is(DDlogTBit.class)) {
ex = new DDlogEString(node, "${" + e.toString() + "}");
ex = new DDlogEIString(node, "${" + e.toString() + "}");
exType = ex.getType();
}
if (exType.is(DDlogTString.class)) {
if (exType.is(DDlogTIString.class)) {
ex = DDlogTIString.ival(ex);
String parseFunc;
switch (tu.getName()) {
case "Date":
Expand Down Expand Up @@ -510,7 +519,7 @@ private static DDlogType functionResultType(Node node, String function, List<DDl
return DDlogTSigned.signed64.setMayBeNull(args.get(0).getType().mayBeNull);
case "concat":
boolean mayBeNull = Linq.any(args, a -> a.getType().mayBeNull);
return DDlogTString.instance.setMayBeNull(mayBeNull);
return DDlogTIString.instance.setMayBeNull(mayBeNull);
case "array_agg":
if (args.size() != 1)
throw new TranslationException("Expected exactly 1 argument for aggregate", node);
Expand Down Expand Up @@ -721,6 +730,6 @@ else if (codePoint <= 0xFFFF) {
@Override
protected DDlogExpression visitStringLiteral(StringLiteral node, TranslationContext context) {
String s = formatStringLiteral(node.getValue());
return new DDlogEString(node, s);
return new DDlogEIString(node, s);
}
}
16 changes: 12 additions & 4 deletions sql/src/main/java/com/vmware/ddlog/translator/SqlSemantics.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public static DDlogType createType(Node node, String sqltype, boolean mayBeNull)
} else if (sqltype.equals("integer") || sqltype.equals("int")) {
type = DDlogTSigned.signed64;
} else if (sqltype.startsWith("varchar")) {
type = DDlogTString.instance;
type = DDlogTIString.instance;
} else if (sqltype.equals("bigint")) {
type = DDlogTInt.instance;
} else if (sqltype.equals("real")) {
Expand Down Expand Up @@ -140,7 +140,7 @@ public DDlogProgram generateLibrary() {
DDlogType raw;
DDlogType withNull;
if (h.equals(this.stringFunctions)) {
raw = DDlogTString.instance;
raw = DDlogTIString.instance;
} else if (op.isBoolean()) {
raw = DDlogTBool.instance;
} else {
Expand Down Expand Up @@ -182,7 +182,15 @@ function add(left: Option<bigint>, right: Option<bigint>): Option<bigint> =
if (i == 0) {
def = new DDlogEBinOp(null, op,
new DDlogEVar(null,"left", raw), new DDlogEVar(null,"right", raw));
if (type.is(DDlogTIString.class)) {
def = new DDlogEApply(def.getNode(), "intern", DDlogTIString.instance, true, def);
}
} else {
def = new DDlogEBinOp(null, op,
new DDlogEVar(null, "l", raw), new DDlogEVar(null, "r", raw));
if (type.is(DDlogTIString.class)) {
def = new DDlogEApply(def.getNode(), "intern", DDlogTIString.instance, true, def);
}
def = new DDlogEMatch(null,
new DDlogETuple(null,
new DDlogEVar(null,"left", leftType),
Expand All @@ -191,15 +199,15 @@ function add(left: Option<bigint>, right: Option<bigint>): Option<bigint> =
new DDlogEMatch.Case(null,
new DDlogETuple(null, leftMatch, rightMatch),
ExpressionTranslationVisitor.wrapSome(
new DDlogEBinOp(null, op,
new DDlogEVar(null, "l", raw), new DDlogEVar(null, "r", raw)), type)),
def, type)),
new DDlogEMatch.Case(null,
new DDlogETuple(null,
new DDlogEPHolder(null),
new DDlogEPHolder(null)),
new DDlogENull(null, type)))
);
}

DDlogFunction func = new DDlogFunction(null, function, type, def, left, right);
result.functions.add(func);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,9 @@ private DDlogExpression aggregateInitializer(FunctionCall f, String aggregate, D
if (dataType.mayBeNull)
return none;
DDlogExpression t = new DDlogEBool(f, true);
if (dataType instanceof DDlogTString)
return new DDlogETuple(f, t, new DDlogEString(f,""));
if (dataType instanceof DDlogTString
|| dataType instanceof DDlogTIString)
return new DDlogETuple(f, t, new DDlogEIString(f,""));
IsNumericType num = dataType.toNumeric();
return new DDlogETuple(f, t, num.zero());
}
Expand Down
12 changes: 6 additions & 6 deletions sql/src/test/java/ddlog/AggregatesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,9 @@ public void testTwoAggregations() {
public void testStringAggregate() {
String query = "create view v0 as SELECT MIN(column2) AS min FROM t1";
String program = this.header(false) +
"typedef TRtmp = TRtmp{min:string}\n" +
"typedef TRtmp = TRtmp{min:istring}\n" +
"function agg(g: Group<(), TRt1>):TRtmp {\n" +
"var min = (true, \"\"): (bool, string);\n" +
"var min = (true, i\"\"): (bool, istring);\n" +
"(for ((i, _) in g) {\n" +
"var v = i;\n" +
"(var incr = v.column2);\n" +
Expand Down Expand Up @@ -454,7 +454,7 @@ public void testMaxCase() {
"var max = (true, 64'sd0): (bool, signed<64>);\n" +
"(for ((i, _) in g) {\n" +
"var v = i;\n" +
"(var incr = if ((v.column2 == \"foo\")) {\n" +
"(var incr = if ((v.column2 == i\"foo\")) {\n" +
"v.column1} else {\n" +
"64'sd0});\n" +
"(max = agg_max_R(max, incr))}\n" +
Expand Down Expand Up @@ -491,9 +491,9 @@ public void testMax() {
public void arrayAggTest() {
String query = "create view v1 as select array_agg(column2) from t1";
String program = this.header(false) +
"typedef TRtmp = TRtmp{col0:Vec<string>}\n" +
"typedef TRtmp = TRtmp{col0:Vec<istring>}\n" +
"function agg(g: Group<(), TRt1>):TRtmp {\n" +
"var array_agg = vec_empty(): Vec<string>;\n" +
"var array_agg = vec_empty(): Vec<istring>;\n" +
"(for ((i, _) in g) {\n" +
"var v = i;\n" +
"(var incr = v.column2);\n" +
Expand All @@ -514,7 +514,7 @@ public void arrayLengthTest() {
String program = this.header(false) +
"typedef TRtmp = TRtmp{col0:signed<64>}\n" +
"function agg(g: Group<(), TRt1>):TRtmp {\n" +
"var array_agg = vec_empty(): Vec<string>;\n" +
"var array_agg = vec_empty(): Vec<istring>;\n" +
"(for ((i, _) in g) {\n" +
"var v = i;\n" +
"(var incr = v.column2);\n" +
Expand Down
Loading

0 comments on commit 327861e

Please sign in to comment.