(improvement)[headless] Remove the as alias containing _ during the headless stage. (#1767)

This commit is contained in:
lexluo09
2024-10-10 11:53:57 +08:00
committed by GitHub
parent 182531d48c
commit 0299743c69
3 changed files with 81 additions and 6 deletions

View File

@@ -37,10 +37,34 @@ import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/** Sql Parser remove Helper */
/**
* Sql Parser remove Helper
*/
@Slf4j
public class SqlRemoveHelper {
private static Pattern pattern =
Pattern.compile("([\\s,\\t\\n]|\\b)_([^\\s,\\t\\n]+)_([\\s,\\t\\n]|\\b)");
public static String removeUnderscores(String sql) {
try {
Matcher matcher = pattern.matcher(sql);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
matcher.appendReplacement(result,
matcher.group(1) + matcher.group(2) + matcher.group(3));
}
matcher.appendTail(result);
return result.toString();
} catch (Exception e) {
log.error("removeUnderscores error", e);
}
return sql;
}
public static String removeAsteriskAndAddFields(String sql, Set<String> needAddDefaultFields) {
Select selectStatement = SqlSelectHelper.getSelect(sql);

View File

@@ -7,9 +7,57 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
/** SqlParser Remove Helper Test */
/**
* SqlParser Remove Helper Test
*/
class SqlRemoveHelperTest {
@Test
void testRemoveUnderscores() {
String sql =
"WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS _访问次数_ FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name, _访问次数_ FROM 部门访问统计";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS 访问次数 FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name, 访问次数 FROM 部门访问统计");
sql = "WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS _访问次数_ FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name,_访问次数_ FROM 部门访问统计";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS 访问次数 FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name,访问次数 FROM 部门访问统计");
sql = "WITH 部门访问统计 AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM 部门访问统计";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"WITH 部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= "
+ "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计");
sql = "WITH _部门访问统计 AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"WITH _部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= "
+ "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM _部门访问统计");
sql = "WITH _部门访问统计_ AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计_";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"WITH 部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= "
+ "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计");
sql = "_部门访问统计_ AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' "
+ "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计_";
sql = SqlRemoveHelper.removeUnderscores(sql);
Assert.assertEquals(sql,
"部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= "
+ "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计");
}
@Test
void testRemoveAsterisk() {
String sql = "select * from 歌曲库";

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.headless.server.utils;
import com.tencent.supersonic.common.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
@@ -69,6 +70,8 @@ public class QueryReqConverter {
functionNameCorrector(querySQLReq, semanticSchemaResp);
// 3.correct tableName
correctTableName(querySQLReq);
// 4.remove Underscores
querySQLReq.setSql(SqlRemoveHelper.removeUnderscores(querySQLReq.getSql()));
String tableName = SqlSelectHelper.getTableName(querySQLReq.getSql());
if (StringUtils.isEmpty(tableName)) {
@@ -78,7 +81,7 @@ public class QueryReqConverter {
String reqSql = querySQLReq.getSql();
querySQLReq.setSql(SqlReplaceHelper.replaceAggAliasOrderItem(querySQLReq.getSql()));
log.debug("replaceOrderAggSameAlias {} -> {}", reqSql, querySQLReq.getSql());
// 4.build MetricTables
// 5.build MetricTables
List<String> allFields = SqlSelectHelper.getAllSelectFields(querySQLReq.getSql());
List<MetricSchemaResp> metricSchemas = getMetrics(semanticSchemaResp, allFields);
List<String> metrics =
@@ -106,7 +109,7 @@ public class QueryReqConverter {
metricTable.setAggOption(aggOption);
List<MetricTable> tables = new ArrayList<>();
tables.add(metricTable);
// 4.build ParseSqlReq
// 6.build ParseSqlReq
DataSetQueryParam result = new DataSetQueryParam();
BeanUtils.copyProperties(querySQLReq, result);
@@ -117,9 +120,9 @@ public class QueryReqConverter {
result.setSupportWith(false);
result.setWithAlias(false);
}
// 5. do deriveMetric
// 7. do deriveMetric
generateDerivedMetric(semanticSchemaResp, aggOption, result);
// 6.physicalSql by ParseSqlReq
// 8.physicalSql by ParseSqlReq
queryStructReq.setDateInfo(queryStructUtils.getDateConfBySql(querySQLReq.getSql()));
queryStructReq.setDataSetId(querySQLReq.getDataSetId());