mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 19:51:00 +00:00
(improvement)(chat) Including agg refers to the metric pattern, adding the missing dimensions for count distinct in the group by clause, and increasing the timeout duration in H2 (#1700)
This commit is contained in:
@@ -42,11 +42,6 @@ public class GroupByCorrector extends BaseSemanticCorrector {
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String correctS2SQL = sqlInfo.getCorrectedS2SQL();
|
||||
SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema();
|
||||
// check has distinct
|
||||
if (SqlSelectHelper.hasDistinct(correctS2SQL)) {
|
||||
log.debug("no need to add groupby ,existed distinct in s2sql:{}", correctS2SQL);
|
||||
return false;
|
||||
}
|
||||
// add alias field name
|
||||
Set<String> dimensions = getDimensions(dataSetId, semanticSchema);
|
||||
List<String> selectFields = SqlSelectHelper.getSelectFields(correctS2SQL);
|
||||
@@ -54,11 +49,11 @@ public class GroupByCorrector extends BaseSemanticCorrector {
|
||||
return false;
|
||||
}
|
||||
// if only date in select not add group by.
|
||||
if (selectFields.size() == 1 && selectFields.contains(TimeDimensionEnum.DAY.getChName())) {
|
||||
if (selectFields.size() == 1 && TimeDimensionEnum.containsZhTimeDimension(selectFields)) {
|
||||
return false;
|
||||
}
|
||||
if (SqlSelectHelper.hasGroupBy(correctS2SQL)) {
|
||||
log.debug("No need to add groupby, existed groupby in s2sql:{}", correctS2SQL);
|
||||
log.debug("No need to add 'group by', existed 'group by' in s2sql:{}", correctS2SQL);
|
||||
return false;
|
||||
}
|
||||
Environment environment = ContextUtils.getBean(Environment.class);
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.tencent.supersonic.headless.chat.parser;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
|
||||
import com.tencent.supersonic.common.pojo.enums.QueryType;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
@@ -70,7 +71,8 @@ public class QueryTypeParser implements SemanticParser {
|
||||
}
|
||||
|
||||
// 2. metric queryType
|
||||
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)) {
|
||||
if (selectContainsMetric(sqlInfo, dataSetId, semanticSchema)
|
||||
|| SqlSelectFunctionHelper.hasAggregateFunction(sqlInfo.getParsedS2SQL())) {
|
||||
return QueryType.METRIC;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
spring:
|
||||
datasource:
|
||||
driver-class-name: org.h2.Driver
|
||||
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
|
||||
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
|
||||
username: root
|
||||
password: semantic
|
||||
sql:
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
"question": "过去半个月核心用户的访问次数",
|
||||
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
|
||||
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
|
||||
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
|
||||
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
|
||||
},
|
||||
{
|
||||
"question": "过去半个月忠实用户有哪一些",
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
spring:
|
||||
datasource:
|
||||
driver-class-name: org.h2.Driver
|
||||
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false
|
||||
url: jdbc:h2:mem:semantic;DATABASE_TO_UPPER=false;QUERY_TIMEOUT=30
|
||||
username: root
|
||||
password: semantic
|
||||
sql:
|
||||
|
||||
@@ -39,7 +39,7 @@
|
||||
"question": "过去半个月核心用户的访问次数",
|
||||
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]",
|
||||
"dbSchema": "DatabaseType=[h2], Table=[超音数产品], PartitionTimeField=[数据日期 FORMAT 'yyyy-MM-dd'], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
|
||||
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
|
||||
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户"
|
||||
},
|
||||
{
|
||||
"question": "过去半个月忠实用户有哪一些",
|
||||
|
||||
Reference in New Issue
Block a user