(improvement)(chat) Special handling for count_distinct operator during SQL correcting and explaining (#320)

This commit is contained in:
yangde
2023-11-04 12:58:25 +08:00
committed by GitHub
parent b8989e204f
commit 2fe56e7462
4 changed files with 121 additions and 2 deletions

View File

@@ -41,5 +41,16 @@ public enum AggOperatorEnum {
return AggOperatorEnum.UNKNOWN;
}
/**
* Determine if aggType is count_Distinct type
* 1.outer SQL parses the count_distinct(field) operator as count(DISTINCT field).
* 2.tableSQL generates aggregation that ignores the count_distinct operator.
* @param aggType aggType
* @return is count_Distinct type or not
*/
public static boolean isCountDistinct(String aggType) {
return null != aggType && aggType.toUpperCase().equals(COUNT_DISTINCT.getOperator());
}
}

View File

@@ -5,6 +5,8 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
@@ -74,7 +76,12 @@ public class SqlParserSelectFunctionHelper {
return null;
}
Function sumFunction = new Function();
sumFunction.setName(aggregateName);
if (AggOperatorEnum.isCountDistinct(aggregateName)) {
sumFunction.setName("count");
sumFunction.setDistinct(true);
} else {
sumFunction.setName(aggregateName);
}
sumFunction.setParameters(new ExpressionList(expression));
return sumFunction;
}

View File

@@ -217,6 +217,106 @@ class SqlParserAddHelperTest {
replaceSql);
}
@Test
void addAggregateToCountDiscountMetricField() {
String sql = "select department, uv from t_1 where sys_imp_date = '2023-09-11' order by uv desc limit 10";
Map<String, String> filedNameToAggregate = new HashMap<>();
filedNameToAggregate.put("uv", "count_distinct");
Set<String> groupByFields = new HashSet<>();
groupByFields.add("department");
String replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE sys_imp_date = '2023-09-11' "
+ "GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where sys_imp_date = '2023-09-11' and uv >1 "
+ "order by uv desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE sys_imp_date = '2023-09-11' "
+ "AND count(DISTINCT uv) > 1 GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where uv >1 order by uv desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE count(DISTINCT uv) > 1 "
+ "GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where count(DISTINCT uv) >1 order by uv desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE count(DISTINCT uv) > 1 "
+ "GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, count(DISTINCT uv) from t_1 where sys_imp_date = '2023-09-11' and count(DISTINCT uv) >1 "
+ "GROUP BY department order by count(DISTINCT uv) desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE sys_imp_date = '2023-09-11' "
+ "AND count(DISTINCT uv) > 1 GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where sys_imp_date = '2023-09-11' and uv >1 "
+ "GROUP BY department order by count(DISTINCT uv) desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE sys_imp_date = '2023-09-11' "
+ "AND count(DISTINCT uv) > 1 GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where sys_imp_date = '2023-09-11' and uv >1 and department = 'HR' "
+ "GROUP BY department order by uv desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE sys_imp_date = '2023-09-11' AND count(DISTINCT uv) > 1 "
+ "AND department = 'HR' GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, uv from t_1 where (uv >1 and department = 'HR') "
+ " and sys_imp_date = '2023-09-11' GROUP BY department order by uv desc limit 10";
replaceSql = SqlParserAddHelper.addAggregateToField(sql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) FROM t_1 WHERE (count(DISTINCT uv) > 1 AND department = 'HR') AND "
+ "sys_imp_date = '2023-09-11' GROUP BY department ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
sql = "select department, count(DISTINCT uv) as uv from t_1 where sys_imp_date = '2023-09-11' GROUP BY "
+ "department order by uv desc limit 10";
replaceSql = SqlParserReplaceHelper.replaceAlias(sql);
replaceSql = SqlParserAddHelper.addAggregateToField(replaceSql, filedNameToAggregate);
replaceSql = SqlParserAddHelper.addGroupBy(replaceSql, groupByFields);
Assert.assertEquals(
"SELECT department, count(DISTINCT uv) AS uv "
+ "FROM t_1 WHERE sys_imp_date = '2023-09-11' GROUP BY department "
+ "ORDER BY count(DISTINCT uv) DESC LIMIT 10",
replaceSql);
}
@Test
void addGroupBy() {
String sql = "select department, sum(pv) from t_1 where sys_imp_date = '2023-09-11' "

View File

@@ -125,7 +125,8 @@ public class QueryReqConverter {
// if there is count() in S2QL,set MetricTable's aggOption to "NATIVE"
String sql = databaseReq.getSql();
if (!SqlParserSelectHelper.hasGroupBy(sql)
|| SqlParserSelectFunctionHelper.hasFunction(sql, "count")) {
|| SqlParserSelectFunctionHelper.hasFunction(sql, "count")
|| SqlParserSelectFunctionHelper.hasFunction(sql, "count_distinct")) {
return AggOption.NATIVE;
}
return AggOption.DEFAULT;