(improvement)(chat) Forcefully delete the data date generated by the large model. (#1607)

This commit is contained in:
lexluo09
2024-08-27 12:36:39 +08:00
committed by GitHub
parent 5606633481
commit b9ae0a4c92
6 changed files with 113 additions and 54 deletions

View File

@@ -1,8 +1,10 @@
package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.jsqlparser.SqlAddHelper;
import com.tencent.supersonic.common.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
@@ -61,15 +63,13 @@ public abstract class BaseSemanticCorrector implements SemanticCorrector {
return elements.stream();
})
.collect(Collectors.toMap(a -> a, a -> a, (k1, k2) -> k1));
if (chatQueryContext.containsPartitionDimensions(dataSetId)) {
result.put(TimeDimensionEnum.DAY.getChName(), TimeDimensionEnum.DAY.getChName());
result.put(TimeDimensionEnum.MONTH.getChName(), TimeDimensionEnum.MONTH.getChName());
result.put(TimeDimensionEnum.WEEK.getChName(), TimeDimensionEnum.WEEK.getChName());
result.put(TimeDimensionEnum.DAY.getChName(), TimeDimensionEnum.DAY.getChName());
result.put(TimeDimensionEnum.MONTH.getChName(), TimeDimensionEnum.MONTH.getChName());
result.put(TimeDimensionEnum.WEEK.getChName(), TimeDimensionEnum.WEEK.getChName());
result.put(TimeDimensionEnum.DAY.getName(), TimeDimensionEnum.DAY.getChName());
result.put(TimeDimensionEnum.MONTH.getName(), TimeDimensionEnum.MONTH.getChName());
result.put(TimeDimensionEnum.WEEK.getName(), TimeDimensionEnum.WEEK.getChName());
}
result.put(TimeDimensionEnum.DAY.getName(), TimeDimensionEnum.DAY.getChName());
result.put(TimeDimensionEnum.MONTH.getName(), TimeDimensionEnum.MONTH.getChName());
result.put(TimeDimensionEnum.WEEK.getName(), TimeDimensionEnum.WEEK.getChName());
return result;
}
@@ -122,4 +122,24 @@ public abstract class BaseSemanticCorrector implements SemanticCorrector {
dimensions.add(TimeDimensionEnum.DAY.getChName());
return dimensions;
}
protected boolean containsPartitionDimensions(ChatQueryContext chatQueryContext,
SemanticParseInfo semanticParseInfo) {
Long dataSetId = semanticParseInfo.getDataSetId();
SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema();
DataSetSchema dataSetSchema = semanticSchema.getDataSetSchemaMap().get(dataSetId);
return dataSetSchema.containsPartitionDimensions();
}
protected void removeDateIfExist(SemanticParseInfo semanticParseInfo) {
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL();
Set<String> removeFieldNames = new HashSet<>();
removeFieldNames.add(TimeDimensionEnum.DAY.getChName());
removeFieldNames.add(TimeDimensionEnum.WEEK.getChName());
removeFieldNames.add(TimeDimensionEnum.MONTH.getChName());
correctS2SQL = SqlRemoveHelper.removeWhereCondition(correctS2SQL, removeFieldNames);
correctS2SQL = SqlRemoveHelper.removeSelect(correctS2SQL, removeFieldNames);
correctS2SQL = SqlRemoveHelper.removeGroupBy(correctS2SQL, removeFieldNames);
semanticParseInfo.getSqlInfo().setCorrectedS2SQL(correctS2SQL);
}
}

View File

@@ -36,6 +36,8 @@ public class SchemaCorrector extends BaseSemanticCorrector {
@Override
public void doCorrect(ChatQueryContext chatQueryContext, SemanticParseInfo semanticParseInfo) {
removeDateFields(chatQueryContext, semanticParseInfo);
correctAggFunction(semanticParseInfo);
replaceAlias(semanticParseInfo);
@@ -47,6 +49,13 @@ public class SchemaCorrector extends BaseSemanticCorrector {
correctFieldName(chatQueryContext, semanticParseInfo);
}
private void removeDateFields(ChatQueryContext chatQueryContext, SemanticParseInfo semanticParseInfo) {
if (containsPartitionDimensions(chatQueryContext, semanticParseInfo)) {
return;
}
removeDateIfExist(semanticParseInfo);
}
private void correctAggFunction(SemanticParseInfo semanticParseInfo) {
Map<String, String> aggregateEnum = AggregateEnum.getAggregateEnum();
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();

View File

@@ -4,12 +4,10 @@ package com.tencent.supersonic.headless.chat.corrector;
import com.tencent.supersonic.common.jsqlparser.DateVisitor.DateBoundInfo;
import com.tencent.supersonic.common.jsqlparser.SqlAddHelper;
import com.tencent.supersonic.common.jsqlparser.SqlDateSelectHelper;
import com.tencent.supersonic.common.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.chat.ChatQueryContext;
import lombok.extern.slf4j.Slf4j;
import net.sf.jsqlparser.JSQLParserException;
@@ -19,10 +17,8 @@ import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.util.CollectionUtils;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
/**
* Perform SQL corrections on the time in S2SQL.
@@ -40,17 +36,6 @@ public class TimeCorrector extends BaseSemanticCorrector {
addLowerBoundDate(semanticParseInfo);
}
private void removeDateIfExist(SemanticParseInfo semanticParseInfo) {
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL();
Set<String> removeFieldNames = new HashSet<>();
removeFieldNames.add(TimeDimensionEnum.DAY.getChName());
removeFieldNames.add(TimeDimensionEnum.WEEK.getChName());
removeFieldNames.add(TimeDimensionEnum.MONTH.getChName());
correctS2SQL = SqlRemoveHelper.removeWhereCondition(correctS2SQL, removeFieldNames);
correctS2SQL = SqlRemoveHelper.removeGroupBy(correctS2SQL, removeFieldNames);
semanticParseInfo.getSqlInfo().setCorrectedS2SQL(correctS2SQL);
}
private void addDateIfNotExist(ChatQueryContext chatQueryContext, SemanticParseInfo semanticParseInfo) {
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL();
List<String> whereFields = SqlSelectHelper.getWhereFields(correctS2SQL);
@@ -80,14 +65,6 @@ public class TimeCorrector extends BaseSemanticCorrector {
semanticParseInfo.getSqlInfo().setCorrectedS2SQL(correctS2SQL);
}
private boolean containsPartitionDimensions(ChatQueryContext chatQueryContext,
SemanticParseInfo semanticParseInfo) {
Long dataSetId = semanticParseInfo.getDataSetId();
SemanticSchema semanticSchema = chatQueryContext.getSemanticSchema();
DataSetSchema dataSetSchema = semanticSchema.getDataSetSchemaMap().get(dataSetId);
return dataSetSchema.containsPartitionDimensions();
}
private void addLowerBoundDate(SemanticParseInfo semanticParseInfo) {
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL();
DateBoundInfo dateBoundInfo = SqlDateSelectHelper.getDateBoundInfo(correctS2SQL);