[improvement](chat) remove duplicates from multiple SQL identified by LLM. (#391)

This commit is contained in:
lexluo09
2023-11-16 09:59:54 +08:00
committed by GitHub
parent 8688c8c2b3
commit 5b3a9ffba8
16 changed files with 348 additions and 128 deletions

View File

@@ -28,38 +28,38 @@ import org.apache.commons.collections.CollectionUtils;
public class FieldAndValueAcquireVisitor extends ExpressionVisitorAdapter {
private Set<FilterExpression> filterExpressions;
private Set<FieldExpression> fieldExpressions;
public FieldAndValueAcquireVisitor(Set<FilterExpression> filterExpressions) {
this.filterExpressions = filterExpressions;
public FieldAndValueAcquireVisitor(Set<FieldExpression> fieldExpressions) {
this.fieldExpressions = fieldExpressions;
}
public void visit(LikeExpression expr) {
Expression leftExpression = expr.getLeftExpression();
Expression rightExpression = expr.getRightExpression();
FilterExpression filterExpression = new FilterExpression();
FieldExpression fieldExpression = new FieldExpression();
String columnName = null;
if (leftExpression instanceof Column) {
Column column = (Column) leftExpression;
columnName = column.getColumnName();
filterExpression.setFieldName(columnName);
fieldExpression.setFieldName(columnName);
}
filterExpression.setFieldValue(getFieldValue(rightExpression));
filterExpression.setOperator(expr.getStringExpression());
filterExpressions.add(filterExpression);
fieldExpression.setFieldValue(getFieldValue(rightExpression));
fieldExpression.setOperator(expr.getStringExpression());
fieldExpressions.add(fieldExpression);
}
public void visit(InExpression expr) {
FilterExpression filterExpression = new FilterExpression();
FieldExpression fieldExpression = new FieldExpression();
Expression leftExpression = expr.getLeftExpression();
if (!(leftExpression instanceof Column)) {
return;
}
filterExpression.setFieldName(((Column) leftExpression).getColumnName());
filterExpression.setOperator(JsqlConstants.IN);
fieldExpression.setFieldName(((Column) leftExpression).getColumnName());
fieldExpression.setOperator(JsqlConstants.IN);
ItemsList rightItemsList = expr.getRightItemsList();
filterExpression.setFieldValue(rightItemsList);
fieldExpression.setFieldValue(rightItemsList);
List<Object> result = new ArrayList<>();
if (rightItemsList instanceof ExpressionList) {
ExpressionList rightExpressionList = (ExpressionList) rightItemsList;
@@ -70,78 +70,78 @@ public class FieldAndValueAcquireVisitor extends ExpressionVisitorAdapter {
}
}
}
filterExpression.setFieldValue(result);
filterExpressions.add(filterExpression);
fieldExpression.setFieldValue(result);
fieldExpressions.add(fieldExpression);
}
@Override
public void visit(MinorThan expr) {
FilterExpression filterExpression = getFilterExpression(expr);
filterExpressions.add(filterExpression);
FieldExpression fieldExpression = getFilterExpression(expr);
fieldExpressions.add(fieldExpression);
}
@Override
public void visit(EqualsTo expr) {
FilterExpression filterExpression = getFilterExpression(expr);
filterExpressions.add(filterExpression);
FieldExpression fieldExpression = getFilterExpression(expr);
fieldExpressions.add(fieldExpression);
}
@Override
public void visit(MinorThanEquals expr) {
FilterExpression filterExpression = getFilterExpression(expr);
filterExpressions.add(filterExpression);
FieldExpression fieldExpression = getFilterExpression(expr);
fieldExpressions.add(fieldExpression);
}
@Override
public void visit(GreaterThan expr) {
FilterExpression filterExpression = getFilterExpression(expr);
filterExpressions.add(filterExpression);
FieldExpression fieldExpression = getFilterExpression(expr);
fieldExpressions.add(fieldExpression);
}
@Override
public void visit(GreaterThanEquals expr) {
FilterExpression filterExpression = getFilterExpression(expr);
filterExpressions.add(filterExpression);
FieldExpression fieldExpression = getFilterExpression(expr);
fieldExpressions.add(fieldExpression);
}
private FilterExpression getFilterExpression(ComparisonOperator expr) {
private FieldExpression getFilterExpression(ComparisonOperator expr) {
Expression leftExpression = expr.getLeftExpression();
Expression rightExpression = expr.getRightExpression();
FilterExpression filterExpression = new FilterExpression();
FieldExpression fieldExpression = new FieldExpression();
String columnName = null;
if (leftExpression instanceof Column) {
Column column = (Column) leftExpression;
columnName = column.getColumnName();
filterExpression.setFieldName(columnName);
fieldExpression.setFieldName(columnName);
}
if (leftExpression instanceof Function) {
Function leftExpressionFunction = (Function) leftExpression;
Column field = getColumn(leftExpressionFunction);
if (Objects.isNull(field)) {
return filterExpression;
return fieldExpression;
}
String functionName = leftExpressionFunction.getName().toUpperCase();
filterExpression.setFieldName(field.getColumnName());
filterExpression.setFunction(functionName);
filterExpression.setOperator(expr.getStringExpression());
fieldExpression.setFieldName(field.getColumnName());
fieldExpression.setFunction(functionName);
fieldExpression.setOperator(expr.getStringExpression());
//deal with DAY/WEEK function
List<DatePeriodEnum> collect = Arrays.stream(DatePeriodEnum.values()).collect(Collectors.toList());
DatePeriodEnum periodEnum = DatePeriodEnum.get(functionName);
if (Objects.nonNull(periodEnum) && collect.contains(periodEnum)) {
filterExpression.setFieldValue(getFieldValue(rightExpression) + periodEnum.getChName());
return filterExpression;
fieldExpression.setFieldValue(getFieldValue(rightExpression) + periodEnum.getChName());
return fieldExpression;
} else {
//deal with aggregate function
filterExpression.setFieldValue(getFieldValue(rightExpression));
return filterExpression;
fieldExpression.setFieldValue(getFieldValue(rightExpression));
return fieldExpression;
}
}
filterExpression.setFieldValue(getFieldValue(rightExpression));
filterExpression.setOperator(expr.getStringExpression());
return filterExpression;
fieldExpression.setFieldValue(getFieldValue(rightExpression));
fieldExpression.setOperator(expr.getStringExpression());
return fieldExpression;
}
private Column getColumn(Function leftExpressionFunction) {

View File

@@ -3,7 +3,7 @@ package com.tencent.supersonic.common.util.jsqlparser;
import lombok.Data;
@Data
public class FilterExpression {
public class FieldExpression {
private String operator;

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.util.jsqlparser;
import com.tencent.supersonic.common.pojo.Constants;
import java.util.List;
import java.util.Set;
import net.sf.jsqlparser.expression.Expression;
@@ -10,27 +11,34 @@ import net.sf.jsqlparser.statement.select.OrderByVisitorAdapter;
public class OrderByAcquireVisitor extends OrderByVisitorAdapter {
private Set<String> fields;
private Set<FieldExpression> fields;
public OrderByAcquireVisitor(Set<String> fields) {
public OrderByAcquireVisitor(Set<FieldExpression> fields) {
this.fields = fields;
}
@Override
public void visit(OrderByElement orderBy) {
Expression expression = orderBy.getExpression();
FieldExpression fieldExpression = new FieldExpression();
if (expression instanceof Column) {
fields.add(((Column) expression).getColumnName());
fieldExpression.setFieldName(((Column) expression).getColumnName());
}
if (expression instanceof Function) {
Function function = (Function) expression;
List<Expression> expressions = function.getParameters().getExpressions();
for (Expression column : expressions) {
if (column instanceof Column) {
fields.add(((Column) column).getColumnName());
fieldExpression.setFieldName(((Column) column).getColumnName());
}
}
}
String operator = Constants.ASC_UPPER;
if (!orderBy.isAsc()) {
operator = Constants.DESC_UPPER;
}
fieldExpression.setOperator(operator);
fields.add(fieldExpression);
super.visit(orderBy);
}
}

View File

@@ -0,0 +1,16 @@
package com.tencent.supersonic.common.util.jsqlparser;
import lombok.Data;
@Data
public class OrderByExpression {
private String operator;
private String fieldName;
private Object fieldValue;
private String function;
}

View File

@@ -0,0 +1,67 @@
package com.tencent.supersonic.common.util.jsqlparser;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
/**
* Sql Parser equal Helper
*/
@Slf4j
public class SqlParserEqualHelper {
/**
* determine if two SQL statements are equal.
*
* @param thisSql
* @param otherSql
* @return
*/
public static boolean equals(String thisSql, String otherSql) {
//1. select fields
List<String> thisSelectFields = SqlParserSelectHelper.getSelectFields(thisSql);
List<String> otherSelectFields = SqlParserSelectHelper.getSelectFields(otherSql);
if (!CollectionUtils.isEqualCollection(thisSelectFields, otherSelectFields)) {
return false;
}
//2. all fields
List<String> thisAllFields = SqlParserSelectHelper.getAllFields(thisSql);
List<String> otherAllFields = SqlParserSelectHelper.getAllFields(otherSql);
if (!CollectionUtils.isEqualCollection(thisAllFields, otherAllFields)) {
return false;
}
//3. where
List<FieldExpression> thisFieldExpressions = SqlParserSelectHelper.getFilterExpression(thisSql);
List<FieldExpression> otherFieldExpressions = SqlParserSelectHelper.getFilterExpression(otherSql);
if (!CollectionUtils.isEqualCollection(thisFieldExpressions, otherFieldExpressions)) {
return false;
}
//4. tableName
if (!SqlParserSelectHelper.getDbTableName(thisSql)
.equalsIgnoreCase(SqlParserSelectHelper.getDbTableName(otherSql))) {
return false;
}
//5. having
List<FieldExpression> thisHavingExpressions = SqlParserSelectHelper.getHavingExpressions(thisSql);
List<FieldExpression> otherHavingExpressions = SqlParserSelectHelper.getHavingExpressions(otherSql);
if (!CollectionUtils.isEqualCollection(thisHavingExpressions, otherHavingExpressions)) {
return false;
}
//6. orderBy
List<FieldExpression> thisOrderByExpressions = SqlParserSelectHelper.getOrderByExpressions(thisSql);
List<FieldExpression> otherOrderByExpressions = SqlParserSelectHelper.getOrderByExpressions(otherSql);
if (!CollectionUtils.isEqualCollection(thisOrderByExpressions, otherOrderByExpressions)) {
return false;
}
return true;
}
}

View File

@@ -5,6 +5,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.JSQLParserException;
import net.sf.jsqlparser.expression.Expression;
@@ -40,12 +41,12 @@ import org.springframework.util.CollectionUtils;
@Slf4j
public class SqlParserSelectHelper {
public static List<FilterExpression> getFilterExpression(String sql) {
public static List<FieldExpression> getFilterExpression(String sql) {
PlainSelect plainSelect = getPlainSelect(sql);
if (Objects.isNull(plainSelect)) {
return new ArrayList<>();
}
Set<FilterExpression> result = new HashSet<>();
Set<FieldExpression> result = new HashSet<>();
Expression where = plainSelect.getWhere();
if (Objects.nonNull(where)) {
where.accept(new FieldAndValueAcquireVisitor(result));
@@ -208,12 +209,12 @@ public class SqlParserSelectHelper {
return null;
}
public static List<FilterExpression> getWhereExpressions(String sql) {
public static List<FieldExpression> getWhereExpressions(String sql) {
PlainSelect plainSelect = getPlainSelect(sql);
if (Objects.isNull(plainSelect)) {
return new ArrayList<>();
}
Set<FilterExpression> result = new HashSet<>();
Set<FieldExpression> result = new HashSet<>();
Expression where = plainSelect.getWhere();
if (Objects.nonNull(where)) {
where.accept(new FieldAndValueAcquireVisitor(result));
@@ -221,12 +222,12 @@ public class SqlParserSelectHelper {
return new ArrayList<>(result);
}
public static List<FilterExpression> getHavingExpressions(String sql) {
public static List<FieldExpression> getHavingExpressions(String sql) {
PlainSelect plainSelect = getPlainSelect(sql);
if (Objects.isNull(plainSelect)) {
return new ArrayList<>();
}
Set<FilterExpression> result = new HashSet<>();
Set<FieldExpression> result = new HashSet<>();
Expression having = plainSelect.getHaving();
if (Objects.nonNull(having)) {
having.accept(new FieldAndValueAcquireVisitor(result));
@@ -244,13 +245,31 @@ public class SqlParserSelectHelper {
return new ArrayList<>(result);
}
private static void getOrderByFields(PlainSelect plainSelect, Set<String> result) {
private static Set<FieldExpression> getOrderByFields(PlainSelect plainSelect) {
Set<FieldExpression> result = new HashSet<>();
List<OrderByElement> orderByElements = plainSelect.getOrderByElements();
if (!CollectionUtils.isEmpty(orderByElements)) {
for (OrderByElement orderByElement : orderByElements) {
orderByElement.accept(new OrderByAcquireVisitor(result));
}
}
return result;
}
private static void getOrderByFields(PlainSelect plainSelect, Set<String> result) {
Set<FieldExpression> orderByFieldExpressions = getOrderByFields(plainSelect);
Set<String> collect = orderByFieldExpressions.stream()
.map(fieldExpression -> fieldExpression.getFieldName())
.collect(Collectors.toSet());
result.addAll(collect);
}
public static List<FieldExpression> getOrderByExpressions(String sql) {
PlainSelect plainSelect = getPlainSelect(sql);
if (Objects.isNull(plainSelect)) {
return new ArrayList<>();
}
return new ArrayList<>(getOrderByFields(plainSelect));
}
public static List<String> getGroupByFields(String sql) {

View File

@@ -77,8 +77,7 @@ class DateUtilsTest {
String startDate = "2023-07-01";
String endDate = "2023-10-01";
List<String> actualDateList = DateUtils.getDateList(startDate, endDate, Constants.MONTH);
List<String> expectedDateList = Lists.newArrayList("2023-07-01", "2023-08-01",
"2023-09-01", "2023-10-01");
List<String> expectedDateList = Lists.newArrayList("2023-07", "2023-08", "2023-09", "2023-10");
Assertions.assertEquals(actualDateList, expectedDateList);
}
}

View File

@@ -0,0 +1,41 @@
package com.tencent.supersonic.common.util.jsqlparser;
import cn.hutool.core.lang.Assert;
import org.junit.jupiter.api.Test;
/**
* @author lex luo
* @date 2023/11/15 15:04
*/
class SqlParserEqualHelperTest {
@Test
void testEquals() {
String sql1 = "SELECT * FROM table1 WHERE column1 = 1 AND column2 = 2";
String sql2 = "SELECT * FROM table1 WHERE column2 = 2 AND column1 = 1";
Assert.equals(SqlParserEqualHelper.equals(sql1, sql2), true);
sql1 = "SELECT a,b,c,d FROM table1 WHERE column1 = 1 AND column2 = 2 order by a";
sql2 = "SELECT d,c,b,a FROM table1 WHERE column2 = 2 AND column1 = 1 order by a";
Assert.equals(SqlParserEqualHelper.equals(sql1, sql2), true);
sql1 = "SELECT a,sum(b),sum(c),sum(d) FROM table1 WHERE column1 = 1 AND column2 = 2 group by a order by a";
sql2 = "SELECT sum(d),sum(c),sum(b),a FROM table1 WHERE column2 = 2 AND column1 = 1 group by a order by a";
Assert.equals(SqlParserEqualHelper.equals(sql1, sql2), true);
sql1 = "SELECT a,sum(b),sum(c),sum(d) FROM table1 WHERE column1 = 1 AND column2 = 2 group by a order by a";
sql2 = "SELECT sum(d),sum(c),sum(b),a FROM table1 WHERE column2 = 2 AND column1 = 1 group by a order by a";
Assert.equals(SqlParserEqualHelper.equals(sql1, sql2), true);
sql1 = "SELECT a,b,c,d FROM table1 WHERE column1 = 1 AND column2 = 2 order by a";
sql2 = "SELECT d,c,b,f FROM table1 WHERE column2 = 2 AND column1 = 1 order by a";
Assert.equals(SqlParserEqualHelper.equals(sql1, sql2), false);
}
}

View File

@@ -18,106 +18,106 @@ class SqlParserSelectHelperTest {
"select 用户名, 访问次数 from 超音数 where 用户名 in ('alice', 'lucy')");
System.out.println(selectStatement);
List<FilterExpression> filterExpression = SqlParserSelectHelper.getFilterExpression(
List<FieldExpression> fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE "
+ "sys_imp_date = '2023-08-08' AND YEAR(publish_date) = 2023 "
+ " AND user_id = 'alice' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE sys_imp_date = '2023-08-08' "
+ " AND YEAR(publish_date) = 2023 "
+ " AND MONTH(publish_date) = 8"
+ " AND user_id = 'alice' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE sys_imp_date = '2023-08-08'"
+ " AND YEAR(publish_date) = 2023 "
+ " AND MONTH(publish_date) = 8 AND DAY(publish_date) =20 "
+ " AND user_id = 'alice' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE sys_imp_date = '2023-08-08' "
+ " AND user_id = 'alice' AND publish_date = '11' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE sys_imp_date = '2023-08-08' "
+ "AND user_id = 'alice' AND publish_date = '11' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE sys_imp_date = '2023-08-08' "
+ "AND user_id = 'alice' AND publish_date = '11' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE "
+ "user_id = 'alice' AND publish_date = '11' ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE "
+ "user_id = 'alice' AND publish_date > 10000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, user_id, field_a FROM s2 WHERE "
+ "user_id like '%alice%' AND publish_date > 10000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, pv FROM s2 WHERE "
+ "user_id like '%alice%' AND publish_date > 10000 "
+ "group by department having sum(pv) > 2000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, pv FROM s2 WHERE "
+ "(user_id like '%alice%' AND publish_date > 10000) and sys_imp_date = '2023-08-08' "
+ "group by department having sum(pv) > 2000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, pv FROM s2 WHERE "
+ "(user_id like '%alice%' AND publish_date > 10000) and song_name in "
+ "('七里香','晴天') and sys_imp_date = '2023-08-08' "
+ "group by department having sum(pv) > 2000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, pv FROM s2 WHERE "
+ "(user_id like '%alice%' AND publish_date > 10000) and song_name in (1,2) "
+ "and sys_imp_date = '2023-08-08' "
+ "group by department having sum(pv) > 2000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression(
fieldExpression = SqlParserSelectHelper.getFilterExpression(
"SELECT department, pv FROM s2 WHERE "
+ "(user_id like '%alice%' AND publish_date > 10000) and 1 in (1) "
+ "and sys_imp_date = '2023-08-08' "
+ "group by department having sum(pv) > 2000 ORDER BY pv DESC LIMIT 1");
System.out.println(filterExpression);
System.out.println(fieldExpression);
filterExpression = SqlParserSelectHelper.getFilterExpression("SELECT sum(销量) / (SELECT sum(销量) FROM 营销月模型 "
fieldExpression = SqlParserSelectHelper.getFilterExpression("SELECT sum(销量) / (SELECT sum(销量) FROM 营销月模型 "
+ "WHERE MONTH(数据日期) = 9) FROM 营销月模型 WHERE 国家中文名 = '肯尼亚' AND MONTH(数据日期) = 9");
System.out.println(filterExpression);
System.out.println(fieldExpression);
}