From 0299743c69ac73fc3b4b6e66cd58696de045f0d2 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:53:57 +0800 Subject: [PATCH] (improvement)[headless] Remove the as alias containing _ during the headless stage. (#1767) --- .../common/jsqlparser/SqlRemoveHelper.java | 26 +++++++++- .../jsqlparser/SqlRemoveHelperTest.java | 50 ++++++++++++++++++- .../server/utils/QueryReqConverter.java | 11 ++-- 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelper.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelper.java index ba1b5b551..f3fc200b7 100644 --- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelper.java +++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelper.java @@ -37,10 +37,34 @@ import java.util.Iterator; import java.util.List; import java.util.Objects; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -/** Sql Parser remove Helper */ +/** + * Sql Parser remove Helper + */ @Slf4j public class SqlRemoveHelper { + private static Pattern pattern = + Pattern.compile("([\\s,\\t\\n]|\\b)_([^\\s,\\t\\n]+)_([\\s,\\t\\n]|\\b)"); + + public static String removeUnderscores(String sql) { + try { + Matcher matcher = pattern.matcher(sql); + + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + matcher.appendReplacement(result, + matcher.group(1) + matcher.group(2) + matcher.group(3)); + } + matcher.appendTail(result); + + return result.toString(); + } catch (Exception e) { + log.error("removeUnderscores error", e); + } + return sql; + } public static String removeAsteriskAndAddFields(String sql, Set needAddDefaultFields) { Select selectStatement = SqlSelectHelper.getSelect(sql); diff --git a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelperTest.java b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelperTest.java index 4cf5f0d13..72a913002 100644 --- a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelperTest.java +++ b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlRemoveHelperTest.java @@ -7,9 +7,57 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -/** SqlParser Remove Helper Test */ +/** + * SqlParser Remove Helper Test + */ class SqlRemoveHelperTest { + @Test + void testRemoveUnderscores() { + String sql = + "WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS _访问次数_ FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name, _访问次数_ FROM 部门访问统计"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS 访问次数 FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name, 访问次数 FROM 部门访问统计"); + + sql = "WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS _访问次数_ FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name,_访问次数_ FROM 部门访问统计"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "WITH 部门访问统计 AS (SELECT department, user_name, SUM(pv) AS 访问次数 FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT user_name,访问次数 FROM 部门访问统计"); + + sql = "WITH 部门访问统计 AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM 部门访问统计"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "WITH 部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= " + + "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计"); + + sql = "WITH _部门访问统计 AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "WITH _部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= " + + "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM _部门访问统计"); + + sql = "WITH _部门访问统计_ AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计_"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "WITH 部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= " + + "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计"); + + sql = "_部门访问统计_ AS (SELECT department, SUM(pv) AS _访问次数_,user_name FROM 超音数数据集 WHERE sys_imp_date >= '2024-07-12' " + + "AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT _访问次数_,user_name FROM _部门访问统计_"; + sql = SqlRemoveHelper.removeUnderscores(sql); + Assert.assertEquals(sql, + "部门访问统计 AS (SELECT department, SUM(pv) AS 访问次数,user_name FROM 超音数数据集 WHERE sys_imp_date >= " + + "'2024-07-12' AND sys_imp_date <= '2024-10-10' GROUP BY department, user_name HAVING SUM(pv) > 100) SELECT 访问次数,user_name FROM 部门访问统计"); + } + @Test void testRemoveAsterisk() { String sql = "select * from 歌曲库"; diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/QueryReqConverter.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/QueryReqConverter.java index acd3e6ac2..f5a65db84 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/QueryReqConverter.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/QueryReqConverter.java @@ -1,5 +1,6 @@ package com.tencent.supersonic.headless.server.utils; +import com.tencent.supersonic.common.jsqlparser.SqlRemoveHelper; import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper; import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper; import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper; @@ -69,6 +70,8 @@ public class QueryReqConverter { functionNameCorrector(querySQLReq, semanticSchemaResp); // 3.correct tableName correctTableName(querySQLReq); + // 4.remove Underscores + querySQLReq.setSql(SqlRemoveHelper.removeUnderscores(querySQLReq.getSql())); String tableName = SqlSelectHelper.getTableName(querySQLReq.getSql()); if (StringUtils.isEmpty(tableName)) { @@ -78,7 +81,7 @@ public class QueryReqConverter { String reqSql = querySQLReq.getSql(); querySQLReq.setSql(SqlReplaceHelper.replaceAggAliasOrderItem(querySQLReq.getSql())); log.debug("replaceOrderAggSameAlias {} -> {}", reqSql, querySQLReq.getSql()); - // 4.build MetricTables + // 5.build MetricTables List allFields = SqlSelectHelper.getAllSelectFields(querySQLReq.getSql()); List metricSchemas = getMetrics(semanticSchemaResp, allFields); List metrics = @@ -106,7 +109,7 @@ public class QueryReqConverter { metricTable.setAggOption(aggOption); List tables = new ArrayList<>(); tables.add(metricTable); - // 4.build ParseSqlReq + // 6.build ParseSqlReq DataSetQueryParam result = new DataSetQueryParam(); BeanUtils.copyProperties(querySQLReq, result); @@ -117,9 +120,9 @@ public class QueryReqConverter { result.setSupportWith(false); result.setWithAlias(false); } - // 5. do deriveMetric + // 7. do deriveMetric generateDerivedMetric(semanticSchemaResp, aggOption, result); - // 6.physicalSql by ParseSqlReq + // 8.physicalSql by ParseSqlReq queryStructReq.setDateInfo(queryStructUtils.getDateConfBySql(querySQLReq.getSql())); queryStructReq.setDataSetId(querySQLReq.getDataSetId());