Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: remove default exist condition for groupby values in favour of … #5420

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions pkg/query-service/app/logs/v3/query_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,18 +205,6 @@ func buildLogsTimeSeriesFilterQuery(fs *v3.FilterSet, groupBy []v3.AttributeKey,
}
}

// add group by conditions to filter out log lines which doesn't have the key
for _, attr := range groupBy {
if !attr.IsColumn {
columnType := getClickhouseLogsColumnType(attr.Type)
columnDataType := getClickhouseLogsColumnDataType(attr.DataType)
conditions = append(conditions, fmt.Sprintf("has(%s_%s_key, '%s')", columnType, columnDataType, attr.Key))
} else if attr.Type != v3.AttributeKeyTypeUnspecified {
// for materialzied columns
conditions = append(conditions, fmt.Sprintf("%s_exists`=true", strings.TrimSuffix(getClickhouseColumnName(attr), "`")))
}
}

// add conditions for aggregate attribute
if aggregateAttribute.Key != "" {
existsFilter := GetExistsNexistsFilter(v3.FilterOperatorExists, v3.FilterItem{Key: aggregateAttribute})
Expand Down
39 changes: 14 additions & 25 deletions pkg/query-service/app/logs/v3/query_builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,15 +221,15 @@ var timeSeriesFilterQueryData = []struct {
{Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"},
}},
GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}},
ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%' AND has(attributes_string_key, 'host')",
ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'",
},
{
Name: "Test groupBy isColumn",
FilterSet: &v3.FilterSet{Operator: "AND", Items: []v3.FilterItem{
{Key: v3.AttributeKey{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}, Value: "102.", Operator: "ncontains"},
}},
GroupBy: []v3.AttributeKey{{Key: "host", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag, IsColumn: true}},
ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%' AND `attribute_string_host_exists`=true",
ExpectedFilter: "attributes_string_value[indexOf(attributes_string_key, 'host')] NOT ILIKE '%102.%'",
},
{
Name: "Wrong data",
Expand Down Expand Up @@ -413,7 +413,7 @@ var testBuildLogsQueryData = []struct {
TableName: "logs",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'host.name')] as `host.name`, " +
"toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'method.name')]))) as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(attributes_string_key, 'host.name') AND has(attributes_string_key, 'method.name') " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(attributes_string_key, 'method.name') " +
"group by `host.name`,ts order by `host.name` ASC",
},
{
Expand All @@ -434,7 +434,7 @@ var testBuildLogsQueryData = []struct {
TableName: "logs",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, `attribute_string_host$$name` as `host.name`, toFloat64(count(distinct(`attribute_string_method$$name`))) as value" +
" from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND `attribute_string_host$$name_exists`=true AND `attribute_string_method$$name_exists`=true " +
"AND `attribute_string_method$$name_exists`=true " +
"group by `host.name`,ts " +
"order by `host.name` ASC",
},
Expand Down Expand Up @@ -463,7 +463,6 @@ var testBuildLogsQueryData = []struct {
"toFloat64(count(distinct(`attribute_string_name`))) as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_string_name_exists`=true " +
"group by `method`,ts " +
"order by `method` ASC",
Expand Down Expand Up @@ -494,8 +493,6 @@ var testBuildLogsQueryData = []struct {
"toFloat64(count(distinct(`attribute_string_name`))) as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND resources_string_value[indexOf(resources_string_key, 'x')] != 'abc' " +
"AND has(attributes_string_key, 'method') " +
"AND has(resources_string_key, 'x') " +
"AND `attribute_string_name_exists`=true " +
"group by `method`,`x`,ts " +
"order by `method` ASC,`x` ASC",
Expand Down Expand Up @@ -525,7 +522,6 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND has(attributes_string_key, 'method') " +
"AND has(attributes_float64_key, 'bytes') " +
"group by `method`,ts " +
"order by `method` ASC",
Expand Down Expand Up @@ -555,7 +551,6 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_float64_bytes_exists`=true " +
"group by `method`,ts " +
"order by `method` ASC",
Expand Down Expand Up @@ -585,7 +580,6 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_float64_bytes_exists`=true " +
"group by `method`,ts " +
"order by `method` ASC",
Expand Down Expand Up @@ -615,7 +609,6 @@ var testBuildLogsQueryData = []struct {
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_float64_bytes_exists`=true " +
"group by `method`,ts " +
"order by `method` ASC",
Expand All @@ -641,7 +634,6 @@ var testBuildLogsQueryData = []struct {
"quantile(0.05)(`attribute_float64_bytes`) as value " +
"from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_float64_bytes_exists`=true " +
"group by `method`,ts " +
"order by `method` ASC",
Expand All @@ -666,7 +658,6 @@ var testBuildLogsQueryData = []struct {
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`" +
", sum(`attribute_float64_bytes`)/1.000000 as value from signoz_logs.distributed_logs " +
"where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND has(attributes_string_key, 'method') " +
"AND `attribute_float64_bytes_exists`=true " +
"group by `method`,ts order by `method` ASC",
},
Expand All @@ -690,7 +681,6 @@ var testBuildLogsQueryData = []struct {
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`" +
", count(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/60.000000 as value " +
"from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND has(attributes_string_key, 'method') " +
"AND has(attributes_float64_key, 'bytes') " +
"group by `method`,ts " +
"order by `method` ASC",
Expand All @@ -716,7 +706,6 @@ var testBuildLogsQueryData = []struct {
"attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, " +
"sum(attributes_float64_value[indexOf(attributes_float64_key, 'bytes')])/1.000000 as value " +
"from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) " +
"AND has(attributes_string_key, 'method') " +
"AND has(attributes_float64_key, 'bytes') " +
"group by `method`,ts " +
"order by `method` ASC",
Expand Down Expand Up @@ -910,7 +899,7 @@ var testBuildLogsQueryData = []struct {
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(attributes_string_key, 'name') group by `name` order by value DESC",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) group by `name` order by value DESC",
},
{
Name: "TABLE: Test rate with groupBy",
Expand All @@ -927,7 +916,7 @@ var testBuildLogsQueryData = []struct {
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, count()/97.000000 as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(attributes_string_key, 'name') group by `name` order by value DESC",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, count()/97.000000 as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) group by `name` order by value DESC",
},
{
Name: "TABLE: Test count with groupBy, orderBy",
Expand All @@ -947,7 +936,7 @@ var testBuildLogsQueryData = []struct {
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(attributes_string_key, 'name') group by `name` order by `name` DESC",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) group by `name` order by `name` DESC",
},
{
Name: "TABLE: Test count with JSON Filter, groupBy, orderBy",
Expand Down Expand Up @@ -981,7 +970,7 @@ var testBuildLogsQueryData = []struct {
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%' AND has(attributes_string_key, 'name') group by `name` order by `name` DESC",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND JSON_EXISTS(body, '$.\"message\"') AND JSON_VALUE(body, '$.\"message\"') ILIKE '%a%' group by `name` order by `name` DESC",
},
{
Name: "TABLE: Test count with JSON Filter Array, groupBy, orderBy",
Expand Down Expand Up @@ -1015,7 +1004,7 @@ var testBuildLogsQueryData = []struct {
},
},
TableName: "logs",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service') AND has(attributes_string_key, 'name') group by `name` order by `name` DESC",
ExpectedQuery: "SELECT now() as ts, attributes_string_value[indexOf(attributes_string_key, 'name')] as `name`, toFloat64(count(*)) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726210000 AND timestamp <= 1680066458000000000) AND has(JSONExtract(JSON_QUERY(body, '$.\"requestor_list\"[*]'), 'Array(String)'), 'index_service') group by `name` order by `name` DESC",
},
}

Expand Down Expand Up @@ -1198,7 +1187,7 @@ var testPrepLogsQueryData = []struct {
GroupBy: []v3.AttributeKey{{Key: "method", DataType: v3.AttributeKeyDataTypeString, Type: v3.AttributeKeyTypeTag}},
},
TableName: "logs",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'method') AND has(attributes_string_key, 'name') group by `method` order by value DESC) LIMIT 10",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'name') group by `method` order by value DESC) LIMIT 10",
Options: Options{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true},
},
{
Expand All @@ -1221,7 +1210,7 @@ var testPrepLogsQueryData = []struct {
OrderBy: []v3.OrderBy{{ColumnName: constants.SigNozOrderByValue, Order: "ASC"}},
},
TableName: "logs",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'method') AND has(attributes_string_key, 'name') group by `method` order by value ASC) LIMIT 10",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'name') group by `method` order by value ASC) LIMIT 10",
Options: Options{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true},
},
{
Expand All @@ -1244,7 +1233,7 @@ var testPrepLogsQueryData = []struct {
OrderBy: []v3.OrderBy{{ColumnName: "method", Order: "ASC"}},
},
TableName: "logs",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'method') AND has(attributes_string_key, 'name') group by `method` order by `method` ASC) LIMIT 10",
ExpectedQuery: "SELECT `method` from (SELECT attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'name') group by `method` order by `method` ASC) LIMIT 10",
Options: Options{GraphLimitQtype: constants.FirstQueryGraphLimit, PreferRPM: true},
},
{
Expand All @@ -1266,7 +1255,7 @@ var testPrepLogsQueryData = []struct {
Limit: 2,
},
TableName: "logs",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'method') AND has(attributes_string_key, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by value DESC",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by value DESC",
Options: Options{GraphLimitQtype: constants.SecondQueryGraphLimit},
},
{
Expand All @@ -1289,7 +1278,7 @@ var testPrepLogsQueryData = []struct {
Limit: 2,
},
TableName: "logs",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'method') AND has(attributes_string_key, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by `method` ASC",
ExpectedQuery: "SELECT toStartOfInterval(fromUnixTimestamp64Nano(timestamp), INTERVAL 60 SECOND) AS ts, attributes_string_value[indexOf(attributes_string_key, 'method')] as `method`, toFloat64(count(distinct(attributes_string_value[indexOf(attributes_string_key, 'name')]))) as value from signoz_logs.distributed_logs where (timestamp >= 1680066360726000000 AND timestamp <= 1680066458000000000) AND attributes_string_value[indexOf(attributes_string_key, 'method')] = 'GET' AND has(attributes_string_key, 'name') AND (`method`) GLOBAL IN (#LIMIT_PLACEHOLDER) group by `method`,ts order by `method` ASC",
Options: Options{GraphLimitQtype: constants.SecondQueryGraphLimit},
},
// Live tail
Expand Down
Loading
Loading