mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-16 06:56:57 +00:00
[improvement][chat] Add threshold judgment to field replacement (#1850)
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
public class EditDistanceUtils {
|
||||
|
||||
public static double getSimilarity(String word1, String word2) {
|
||||
return 1 - (double) editDistance(word1, word2) / Math.max(word2.length(), word1.length());
|
||||
}
|
||||
|
||||
public static int editDistance(String word1, String word2) {
|
||||
final int m = word1.length();
|
||||
final int n = word2.length();
|
||||
int[][] dp = new int[m + 1][n + 1];
|
||||
for (int j = 0; j <= n; ++j) {
|
||||
dp[0][j] = j;
|
||||
}
|
||||
for (int i = 0; i <= m; ++i) {
|
||||
dp[i][0] = i;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= m; ++i) {
|
||||
char ci = word1.charAt(i - 1);
|
||||
for (int j = 1; j <= n; ++j) {
|
||||
char cj = word2.charAt(j - 1);
|
||||
if (ci == cj) {
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
} else if (i > 1 && j > 1 && ci == word2.charAt(j - 2)
|
||||
&& cj == word1.charAt(i - 2)) {
|
||||
dp[i][j] = 1 + Math.min(dp[i - 2][j - 2], Math.min(dp[i][j - 1], dp[i - 1][j]));
|
||||
} else {
|
||||
dp[i][j] = Math.min(dp[i - 1][j - 1] + 1,
|
||||
Math.min(dp[i][j - 1] + 1, dp[i - 1][j] + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
return dp[m][n];
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.ExpressionVisitorAdapter;
|
||||
import net.sf.jsqlparser.expression.Function;
|
||||
@@ -9,7 +10,6 @@ import java.util.Map;
|
||||
|
||||
@Slf4j
|
||||
public class FieldReplaceVisitor extends ExpressionVisitorAdapter {
|
||||
ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
|
||||
private Map<String, String> fieldNameMap;
|
||||
private ThreadLocal<Boolean> exactReplace = ThreadLocal.withInitial(() -> false);
|
||||
|
||||
@@ -20,7 +20,8 @@ public class FieldReplaceVisitor extends ExpressionVisitorAdapter {
|
||||
|
||||
@Override
|
||||
public void visit(Column column) {
|
||||
parseVisitorHelper.replaceColumn(column, fieldNameMap, exactReplace.get());
|
||||
ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
|
||||
replaceService.replaceColumn(column, fieldNameMap, exactReplace.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.DoubleValue;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
@@ -27,7 +28,6 @@ import java.util.Objects;
|
||||
@Slf4j
|
||||
public class FieldValueReplaceVisitor extends ExpressionVisitorAdapter {
|
||||
|
||||
ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
|
||||
private boolean exactReplace;
|
||||
private Map<String, Map<String, String>> filedNameToValueMap;
|
||||
|
||||
@@ -138,7 +138,8 @@ public class FieldValueReplaceVisitor extends ExpressionVisitorAdapter {
|
||||
private String getReplaceValue(Map<String, String> valueMap, String beforeValue) {
|
||||
String afterValue = valueMap.get(String.valueOf(beforeValue));
|
||||
if (StringUtils.isEmpty(afterValue) && !exactReplace) {
|
||||
return parseVisitorHelper.getReplaceValue(beforeValue, valueMap, false);
|
||||
ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
|
||||
return replaceService.getReplaceValue(beforeValue, valueMap, false);
|
||||
}
|
||||
return afterValue;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
import net.sf.jsqlparser.expression.Function;
|
||||
@@ -14,7 +15,6 @@ import java.util.Map;
|
||||
@Slf4j
|
||||
public class GroupByReplaceVisitor implements GroupByVisitor {
|
||||
|
||||
ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
|
||||
private Map<String, String> fieldNameMap;
|
||||
private boolean exactReplace;
|
||||
|
||||
@@ -34,10 +34,11 @@ public class GroupByReplaceVisitor implements GroupByVisitor {
|
||||
}
|
||||
|
||||
private void replaceExpression(Expression expression) {
|
||||
ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
|
||||
if (expression instanceof Column) {
|
||||
parseVisitorHelper.replaceColumn((Column) expression, fieldNameMap, exactReplace);
|
||||
replaceService.replaceColumn((Column) expression, fieldNameMap, exactReplace);
|
||||
} else if (expression instanceof Function) {
|
||||
parseVisitorHelper.replaceFunction((Function) expression, fieldNameMap, exactReplace);
|
||||
replaceService.replaceFunction((Function) expression, fieldNameMap, exactReplace);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
import net.sf.jsqlparser.expression.Function;
|
||||
import net.sf.jsqlparser.schema.Column;
|
||||
@@ -9,8 +10,6 @@ import net.sf.jsqlparser.statement.select.OrderByVisitorAdapter;
|
||||
import java.util.Map;
|
||||
|
||||
public class OrderByReplaceVisitor extends OrderByVisitorAdapter {
|
||||
|
||||
ParseVisitorHelper parseVisitorHelper = new ParseVisitorHelper();
|
||||
private Map<String, String> fieldNameMap;
|
||||
private boolean exactReplace;
|
||||
|
||||
@@ -22,11 +21,12 @@ public class OrderByReplaceVisitor extends OrderByVisitorAdapter {
|
||||
@Override
|
||||
public void visit(OrderByElement orderBy) {
|
||||
Expression expression = orderBy.getExpression();
|
||||
ReplaceService replaceService = ContextUtils.getBean(ReplaceService.class);
|
||||
if (expression instanceof Column) {
|
||||
parseVisitorHelper.replaceColumn((Column) expression, fieldNameMap, exactReplace);
|
||||
replaceService.replaceColumn((Column) expression, fieldNameMap, exactReplace);
|
||||
}
|
||||
if (expression instanceof Function) {
|
||||
parseVisitorHelper.replaceFunction((Function) expression, fieldNameMap, exactReplace);
|
||||
replaceService.replaceFunction((Function) expression, fieldNameMap, exactReplace);
|
||||
}
|
||||
super.visit(orderBy);
|
||||
}
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
package com.tencent.supersonic.common.jsqlparser;
|
||||
|
||||
import com.tencent.supersonic.common.util.StringUtil;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
import net.sf.jsqlparser.expression.Function;
|
||||
import net.sf.jsqlparser.expression.operators.relational.ExpressionList;
|
||||
import net.sf.jsqlparser.schema.Column;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
@@ -14,7 +17,12 @@ import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class ParseVisitorHelper {
|
||||
@Service
|
||||
@Data
|
||||
public class ReplaceService {
|
||||
|
||||
@Value("${s2.replace.column.threshold:0.4}")
|
||||
private double replaceColumnThreshold;
|
||||
|
||||
public void replaceFunction(Function expression, Map<String, String> fieldNameMap,
|
||||
boolean exactReplace) {
|
||||
@@ -38,9 +46,9 @@ public class ParseVisitorHelper {
|
||||
|
||||
public String getReplaceValue(String beforeValue, Map<String, String> valueMap,
|
||||
boolean exactReplace) {
|
||||
String value = valueMap.get(beforeValue);
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
return value;
|
||||
String replaceValue = valueMap.get(beforeValue);
|
||||
if (StringUtils.isNotBlank(replaceValue)) {
|
||||
return replaceValue;
|
||||
}
|
||||
if (exactReplace) {
|
||||
return null;
|
||||
@@ -48,47 +56,18 @@ public class ParseVisitorHelper {
|
||||
Optional<Entry<String, String>> first = valueMap.entrySet().stream().sorted((k1, k2) -> {
|
||||
String k1Value = k1.getKey();
|
||||
String k2Value = k2.getKey();
|
||||
Double k1Similarity = getSimilarity(beforeValue, k1Value);
|
||||
Double k2Similarity = getSimilarity(beforeValue, k2Value);
|
||||
Double k1Similarity = EditDistanceUtils.getSimilarity(beforeValue, k1Value);
|
||||
Double k2Similarity = EditDistanceUtils.getSimilarity(beforeValue, k2Value);
|
||||
return k2Similarity.compareTo(k1Similarity);
|
||||
}).collect(Collectors.toList()).stream().findFirst();
|
||||
|
||||
if (first.isPresent()) {
|
||||
return first.get().getValue();
|
||||
replaceValue = first.get().getValue();
|
||||
double similarity = EditDistanceUtils.getSimilarity(beforeValue, replaceValue);
|
||||
if (similarity > replaceColumnThreshold) {
|
||||
return replaceValue;
|
||||
}
|
||||
}
|
||||
return beforeValue;
|
||||
}
|
||||
|
||||
public static int editDistance(String word1, String word2) {
|
||||
final int m = word1.length();
|
||||
final int n = word2.length();
|
||||
int[][] dp = new int[m + 1][n + 1];
|
||||
for (int j = 0; j <= n; ++j) {
|
||||
dp[0][j] = j;
|
||||
}
|
||||
for (int i = 0; i <= m; ++i) {
|
||||
dp[i][0] = i;
|
||||
}
|
||||
|
||||
for (int i = 1; i <= m; ++i) {
|
||||
char ci = word1.charAt(i - 1);
|
||||
for (int j = 1; j <= n; ++j) {
|
||||
char cj = word2.charAt(j - 1);
|
||||
if (ci == cj) {
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
} else if (i > 1 && j > 1 && ci == word2.charAt(j - 2)
|
||||
&& cj == word1.charAt(i - 2)) {
|
||||
dp[i][j] = 1 + Math.min(dp[i - 2][j - 2], Math.min(dp[i][j - 1], dp[i - 1][j]));
|
||||
} else {
|
||||
dp[i][j] = Math.min(dp[i - 1][j - 1] + 1,
|
||||
Math.min(dp[i][j - 1] + 1, dp[i - 1][j] + 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
return dp[m][n];
|
||||
}
|
||||
|
||||
public double getSimilarity(String word1, String word2) {
|
||||
return 1 - (double) editDistance(word1, word2) / Math.max(word2.length(), word1.length());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user