diff --git a/common/pom.xml b/common/pom.xml
index 27b847cc9..6ade01fd6 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -247,6 +247,12 @@
org.codehaus.woodstox
stax2-api
+
+ org.mockito
+ mockito-inline
+ ${mockito-inline.version}
+ test
+
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/EditDistanceUtils.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/EditDistanceUtils.java
new file mode 100644
index 000000000..9499db316
--- /dev/null
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/EditDistanceUtils.java
@@ -0,0 +1,37 @@
+package com.tencent.supersonic.common.jsqlparser;
+
+public class EditDistanceUtils {
+
+ public static double getSimilarity(String word1, String word2) {
+ return 1 - (double) editDistance(word1, word2) / Math.max(word2.length(), word1.length());
+ }
+
+ public static int editDistance(String word1, String word2) {
+ final int m = word1.length();
+ final int n = word2.length();
+ int[][] dp = new int[m + 1][n + 1];
+ for (int j = 0; j <= n; ++j) {
+ dp[0][j] = j;
+ }
+ for (int i = 0; i <= m; ++i) {
+ dp[i][0] = i;
+ }
+
+ for (int i = 1; i <= m; ++i) {
+ char ci = word1.charAt(i - 1);
+ for (int j = 1; j <= n; ++j) {
+ char cj = word2.charAt(j - 1);
+ if (ci == cj) {
+ dp[i][j] = dp[i - 1][j - 1];
+ } else if (i > 1 && j > 1 && ci == word2.charAt(j - 2)
+ && cj == word1.charAt(i - 2)) {
+ dp[i][j] = 1 + Math.min(dp[i - 2][j - 2], Math.min(dp[i][j - 1], dp[i - 1][j]));
+ } else {
+ dp[i][j] = Math.min(dp[i - 1][j - 1] + 1,
+ Math.min(dp[i][j - 1] + 1, dp[i - 1][j] + 1));
+ }
+ }
+ }
+ return dp[m][n];
+ }
+}
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldReplaceVisitor.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldReplaceVisitor.java
index 39aefbe89..87f50cb7e 100644
--- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldReplaceVisitor.java
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldReplaceVisitor.java
@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.jsqlparser;
+import com.tencent.supersonic.common.util.ContextUtils;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.ExpressionVisitorAdapter;
import net.sf.jsqlparser.expression.Function;
@@ -9,7 +10,6 @@ import java.util.Map;
@Slf4j
public class FieldReplaceVisitor extends ExpressionVisitorAdapter {
- ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
private Map fieldNameMap;
private ThreadLocal exactReplace = ThreadLocal.withInitial(() -> false);
@@ -20,7 +20,8 @@ public class FieldReplaceVisitor extends ExpressionVisitorAdapter {
@Override
public void visit(Column column) {
- parseVisitorHelper.replaceColumn(column, fieldNameMap, exactReplace.get());
+ ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
+ replaceService.replaceColumn(column, fieldNameMap, exactReplace.get());
}
@Override
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldValueReplaceVisitor.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldValueReplaceVisitor.java
index ad524cc9a..65b669c88 100644
--- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldValueReplaceVisitor.java
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/FieldValueReplaceVisitor.java
@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.jsqlparser;
+import com.tencent.supersonic.common.util.ContextUtils;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.DoubleValue;
import net.sf.jsqlparser.expression.Expression;
@@ -27,7 +28,6 @@ import java.util.Objects;
@Slf4j
public class FieldValueReplaceVisitor extends ExpressionVisitorAdapter {
- ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
private boolean exactReplace;
private Map> filedNameToValueMap;
@@ -138,7 +138,8 @@ public class FieldValueReplaceVisitor extends ExpressionVisitorAdapter {
private String getReplaceValue(Map valueMap, String beforeValue) {
String afterValue = valueMap.get(String.valueOf(beforeValue));
if (StringUtils.isEmpty(afterValue) && !exactReplace) {
- return parseVisitorHelper.getReplaceValue(beforeValue, valueMap, false);
+ ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
+ return replaceService.getReplaceValue(beforeValue, valueMap, false);
}
return afterValue;
}
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/GroupByReplaceVisitor.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/GroupByReplaceVisitor.java
index 7fc4babb2..b46c846b4 100644
--- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/GroupByReplaceVisitor.java
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/GroupByReplaceVisitor.java
@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.jsqlparser;
+import com.tencent.supersonic.common.util.ContextUtils;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
@@ -14,7 +15,6 @@ import java.util.Map;
@Slf4j
public class GroupByReplaceVisitor implements GroupByVisitor {
- ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
private Map fieldNameMap;
private boolean exactReplace;
@@ -34,10 +34,11 @@ public class GroupByReplaceVisitor implements GroupByVisitor {
}
private void replaceExpression(Expression expression) {
+ ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
if (expression instanceof Column) {
- parseVisitorHelper.replaceColumn((Column) expression, fieldNameMap, exactReplace);
+ replaceService.replaceColumn((Column) expression, fieldNameMap, exactReplace);
} else if (expression instanceof Function) {
- parseVisitorHelper.replaceFunction((Function) expression, fieldNameMap, exactReplace);
+ replaceService.replaceFunction((Function) expression, fieldNameMap, exactReplace);
}
}
}
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/OrderByReplaceVisitor.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/OrderByReplaceVisitor.java
index 0af48f49f..4c0ee2cf0 100644
--- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/OrderByReplaceVisitor.java
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/OrderByReplaceVisitor.java
@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.jsqlparser;
+import com.tencent.supersonic.common.util.ContextUtils;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.schema.Column;
@@ -9,8 +10,6 @@ import net.sf.jsqlparser.statement.select.OrderByVisitorAdapter;
import java.util.Map;
public class OrderByReplaceVisitor extends OrderByVisitorAdapter {
-
- ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
private Map fieldNameMap;
private boolean exactReplace;
@@ -22,11 +21,12 @@ public class OrderByReplaceVisitor extends OrderByVisitorAdapter {
@Override
public void visit(OrderByElement orderBy) {
Expression expression = orderBy.getExpression();
+ ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
if (expression instanceof Column) {
- parseVisitorHelper.replaceColumn((Column) expression, fieldNameMap, exactReplace);
+ replaceService.replaceColumn((Column) expression, fieldNameMap, exactReplace);
}
if (expression instanceof Function) {
- parseVisitorHelper.replaceFunction((Function) expression, fieldNameMap, exactReplace);
+ replaceService.replaceFunction((Function) expression, fieldNameMap, exactReplace);
}
super.visit(orderBy);
}
diff --git a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/ParseVisitorHelper.java b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/ReplaceService.java
similarity index 55%
rename from common/src/main/java/com/tencent/supersonic/common/jsqlparser/ParseVisitorHelper.java
rename to common/src/main/java/com/tencent/supersonic/common/jsqlparser/ReplaceService.java
index 2ab3d00eb..0f4957eab 100644
--- a/common/src/main/java/com/tencent/supersonic/common/jsqlparser/ParseVisitorHelper.java
+++ b/common/src/main/java/com/tencent/supersonic/common/jsqlparser/ReplaceService.java
@@ -1,12 +1,15 @@
package com.tencent.supersonic.common.jsqlparser;
import com.tencent.supersonic.common.util.StringUtil;
+import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;
import net.sf.jsqlparser.expression.operators.relational.ExpressionList;
import net.sf.jsqlparser.schema.Column;
import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
import java.util.Map;
import java.util.Map.Entry;
@@ -14,7 +17,12 @@ import java.util.Optional;
import java.util.stream.Collectors;
@Slf4j
-public class ParseVisitorHelper {
+@Service
+@Data
+public class ReplaceService {
+
+ @Value("${s2.replace.column.threshold:0.4}")
+ private double replaceColumnThreshold;
public void replaceFunction(Function expression, Map fieldNameMap,
boolean exactReplace) {
@@ -38,9 +46,9 @@ public class ParseVisitorHelper {
public String getReplaceValue(String beforeValue, Map valueMap,
boolean exactReplace) {
- String value = valueMap.get(beforeValue);
- if (StringUtils.isNotBlank(value)) {
- return value;
+ String replaceValue = valueMap.get(beforeValue);
+ if (StringUtils.isNotBlank(replaceValue)) {
+ return replaceValue;
}
if (exactReplace) {
return null;
@@ -48,47 +56,18 @@ public class ParseVisitorHelper {
Optional> first = valueMap.entrySet().stream().sorted((k1, k2) -> {
String k1Value = k1.getKey();
String k2Value = k2.getKey();
- Double k1Similarity = getSimilarity(beforeValue, k1Value);
- Double k2Similarity = getSimilarity(beforeValue, k2Value);
+ Double k1Similarity = EditDistanceUtils.getSimilarity(beforeValue, k1Value);
+ Double k2Similarity = EditDistanceUtils.getSimilarity(beforeValue, k2Value);
return k2Similarity.compareTo(k1Similarity);
}).collect(Collectors.toList()).stream().findFirst();
if (first.isPresent()) {
- return first.get().getValue();
+ replaceValue = first.get().getValue();
+ double similarity = EditDistanceUtils.getSimilarity(beforeValue, replaceValue);
+ if (similarity > replaceColumnThreshold) {
+ return replaceValue;
+ }
}
return beforeValue;
}
-
- public static int editDistance(String word1, String word2) {
- final int m = word1.length();
- final int n = word2.length();
- int[][] dp = new int[m + 1][n + 1];
- for (int j = 0; j <= n; ++j) {
- dp[0][j] = j;
- }
- for (int i = 0; i <= m; ++i) {
- dp[i][0] = i;
- }
-
- for (int i = 1; i <= m; ++i) {
- char ci = word1.charAt(i - 1);
- for (int j = 1; j <= n; ++j) {
- char cj = word2.charAt(j - 1);
- if (ci == cj) {
- dp[i][j] = dp[i - 1][j - 1];
- } else if (i > 1 && j > 1 && ci == word2.charAt(j - 2)
- && cj == word1.charAt(i - 2)) {
- dp[i][j] = 1 + Math.min(dp[i - 2][j - 2], Math.min(dp[i][j - 1], dp[i - 1][j]));
- } else {
- dp[i][j] = Math.min(dp[i - 1][j - 1] + 1,
- Math.min(dp[i][j - 1] + 1, dp[i - 1][j] + 1));
- }
- }
- }
- return dp[m][n];
- }
-
- public double getSimilarity(String word1, String word2) {
- return 1 - (double) editDistance(word1, word2) / Math.max(word2.length(), word1.length());
- }
}
diff --git a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceFieldsTest.java b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceFieldsTest.java
index b04d8586e..4585ede1e 100644
--- a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceFieldsTest.java
+++ b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceFieldsTest.java
@@ -18,9 +18,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFields(replaceSql, fieldToBizName);
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-08-08' AND publish_date <= '2023-08-09')"
- + " AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' AND song_publis_date = '2023-08-01'"
- + " ORDER BY play_count DESC LIMIT 11",
+ "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-08-08' AND publish_date "
+ + "<= '2023-08-09') AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' AND "
+ + "歌曲发布时 = '2023-08-01' ORDER BY 播放量 DESC LIMIT 11",
replaceSql);
}
@@ -77,9 +77,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFields(replaceSql, fieldToBizName);
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
- Assert.assertEquals("SELECT YEAR(发行日期), count(song_name) FROM 歌曲库 "
- + "WHERE YEAR(发行日期) IN (2022, 2023) AND sys_imp_date = '2023-08-14' "
- + "GROUP BY YEAR(publish_date)", replaceSql);
+ Assert.assertEquals("SELECT YEAR(发行日期), count(song_name) FROM 歌曲库 WHERE "
+ + "YEAR(发行日期) IN (2022, 2023) AND sys_imp_date = '2023-08-14' GROUP BY YEAR(发行日期)",
+ replaceSql);
}
@Test
@@ -91,9 +91,10 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFields(replaceSql, fieldToBizName);
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
- Assert.assertEquals("SELECT YEAR(发行日期), count(song_name) FROM 歌曲库 "
- + "WHERE YEAR(发行日期) IN (2022, 2023) AND sys_imp_date = '2023-08-14'"
- + " GROUP BY publish_date", replaceSql);
+ Assert.assertEquals(
+ "SELECT YEAR(发行日期), count(song_name) FROM 歌曲库 WHERE YEAR(发行日期) "
+ + "IN (2022, 2023) AND sys_imp_date = '2023-08-14' GROUP BY 发行日期",
+ replaceSql);
}
@@ -107,9 +108,8 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2022-08-11' "
- + "AND publish_date <= '2023-08-11') AND play_count > 1000000 AND "
- + "(sys_imp_date >= '2023-07-12' AND sys_imp_date <= '2023-08-11')",
+ "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2022-08-11' AND publish_date <= '2023-08-11')"
+ + " AND 结算播放量 > 1000000 AND (sys_imp_date >= '2023-07-12' AND sys_imp_date <= '2023-08-11')",
replaceSql);
}
@@ -123,8 +123,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-08-08' AND publish_date <= '2023-08-09')"
- + " AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' ORDER BY play_count DESC LIMIT 11",
+ "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-08-08' AND publish_date "
+ + "<= '2023-08-09') AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' ORDER BY "
+ + "播放量 DESC LIMIT 11",
replaceSql);
}
@@ -138,8 +139,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-01-01' AND publish_date <= '2023-08-09')"
- + " AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' ORDER BY play_count DESC LIMIT 11",
+ "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-01-01' AND publish_date "
+ + "<= '2023-08-09') AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' "
+ + "ORDER BY 播放量 DESC LIMIT 11",
replaceSql);
}
@@ -153,8 +155,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-02-09' AND publish_date <= '2023-08-09')"
- + " AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' ORDER BY play_count DESC LIMIT 11",
+ "SELECT song_name FROM 歌曲库 WHERE (publish_date >= '2023-02-09' AND publish_date <="
+ + " '2023-08-09') AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' "
+ + "ORDER BY 播放量 DESC LIMIT 11",
replaceSql);
}
@@ -167,9 +170,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
fieldToBizName);
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
replaceSql = SqlRemoveHelper.removeNumberFilter(replaceSql);
- Assert.assertEquals("SELECT song_name FROM 歌曲库 WHERE publish_date <= '2023-02-09' AND"
- + " singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09'"
- + " ORDER BY play_count DESC LIMIT 11", replaceSql);
+ Assert.assertEquals("SELECT song_name FROM 歌曲库 WHERE publish_date <= '2023-02-09' "
+ + "AND singer_name = '邓紫棋' AND sys_imp_date = '2023-08-09' ORDER BY 播放量 DESC LIMIT 11",
+ replaceSql);
}
@Test
@@ -222,9 +225,8 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT song_name, sum(评分) FROM CSpider WHERE (1 < 2) AND "
- + "sys_imp_date = '2023-10-15' GROUP BY song_name HAVING "
- + "sum(评分) < (SELECT min(评分) FROM CSpider WHERE user_id = '英文')",
+ "SELECT 歌曲名称, sum(评分) FROM CSpider WHERE (1 < 2) AND sys_imp_date = '2023-10-15' "
+ + "GROUP BY 歌曲名称 HAVING sum(评分) < (SELECT min(评分) FROM CSpider WHERE 语种 = '英文')",
replaceSql);
}
@@ -239,9 +241,9 @@ class SqlReplaceFieldsTest extends SqlReplaceHelperTest {
replaceSql = SqlReplaceHelper.replaceFunction(replaceSql);
Assert.assertEquals(
- "SELECT sum(评分) / (SELECT sum(评分) FROM CSpider WHERE sys_imp_date = '2023-10-15') "
- + "FROM CSpider WHERE sys_imp_date = '2023-10-15' GROUP BY song_name HAVING "
- + "sum(评分) < (SELECT min(评分) FROM CSpider WHERE user_id = '英文')",
+ "SELECT sum(评分) / (SELECT sum(评分) FROM CSpider WHERE sys_imp_date = '2023-10-15') FROM "
+ + "CSpider WHERE sys_imp_date = '2023-10-15' GROUP BY 歌曲名称 HAVING sum(评分) < (SELECT min(评分) "
+ + "FROM CSpider WHERE 语种 = '英文')",
replaceSql);
}
diff --git a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceHelperTest.java b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceHelperTest.java
index 0175e90ed..c3a579558 100644
--- a/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceHelperTest.java
+++ b/common/src/test/java/com/tencent/supersonic/common/jsqlparser/SqlReplaceHelperTest.java
@@ -1,9 +1,13 @@
package com.tencent.supersonic.common.jsqlparser;
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
+import com.tencent.supersonic.common.util.ContextUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.junit.Assert;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
+import org.mockito.MockedStatic;
import java.util.Collections;
import java.util.HashMap;
@@ -11,10 +15,25 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import static org.mockito.Mockito.mockStatic;
+
/**
* SqlParserReplaceHelperTest
*/
class SqlReplaceHelperTest {
+ private MockedStatic mockedContextUtils;
+
+ @BeforeEach
+ public void setUp() {
+ ReplaceService replaceService = new ReplaceService();
+ replaceService.setReplaceColumnThreshold(0.0);
+
+ // Mock the static method ContextUtils.getBean
+ mockedContextUtils = mockStatic(ContextUtils.class);
+ mockedContextUtils.when(() -> ContextUtils.getBean(ReplaceService.class))
+ .thenReturn(replaceService);
+ }
+
@Test
void testReplaceAggField() {
String sql = "SELECT 维度1,sum(播放量) FROM 数据库 "
@@ -334,4 +353,12 @@ class SqlReplaceHelperTest {
fieldToBizName.put("访问次数", "pv");
return fieldToBizName;
}
+
+ @AfterEach
+ public void tearDown() {
+ // Close the mocked static context
+ if (mockedContextUtils != null) {
+ mockedContextUtils.close();
+ }
+ }
}