(improvement)(headless) Remove MetricCheckProcessor in chat and MetricDrillDownChecker in headless (#716)

(improvement)(headless) remove MetricCheckProcessor in chat and MetricDrillDownChecker in headless

---------

Co-authored-by: jolunoluo
This commit is contained in:
LXW
2024-02-04 14:28:24 +08:00
committed by GitHub
parent 4d4922d269
commit 0c4c6d83ef
18 changed files with 400 additions and 417 deletions

View File

@@ -1,220 +0,0 @@
package com.tencent.supersonic.chat.server.processor.parse;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.tencent.supersonic.chat.core.pojo.ChatContext;
import com.tencent.supersonic.chat.core.pojo.QueryContext;
import com.tencent.supersonic.chat.api.pojo.RelatedSchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.chat.core.query.SemanticQuery;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.chat.api.pojo.response.ParseResp;
import com.tencent.supersonic.chat.server.service.SemanticService;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.jsqlparser.SqlRemoveHelper;
import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
/**
* MetricCheckProcessor verifies whether the dimensions
* involved in the query in metric mode can drill down on the metric.
*/
@Slf4j
public class MetricCheckProcessor implements ParseResultProcessor {
@Override
public void process(ParseResp parseResp, QueryContext queryContext, ChatContext chatContext) {
List<SemanticQuery> semanticQueries = queryContext.getCandidateQueries();
SemanticService semanticService = ContextUtils.getBean(SemanticService.class);
SemanticSchema semanticSchema = semanticService.getSemanticSchema();
for (SemanticQuery semanticQuery : semanticQueries) {
SemanticParseInfo parseInfo = semanticQuery.getParseInfo();
if (!QueryType.METRIC.equals(parseInfo.getQueryType())) {
continue;
}
String correctSqlProcessed = processCorrectSql(parseInfo, semanticSchema);
log.info("correct sql:{}", correctSqlProcessed);
parseInfo.getSqlInfo().setCorrectS2SQL(correctSqlProcessed);
}
semanticQueries.removeIf(semanticQuery -> {
if (!QueryType.METRIC.equals(semanticQuery.getParseInfo().getQueryType())) {
return false;
}
String correctSql = semanticQuery.getParseInfo().getSqlInfo().getCorrectS2SQL();
if (StringUtils.isBlank(correctSql)) {
return false;
}
return !checkHasMetric(correctSql, semanticSchema);
});
}
public String processCorrectSql(SemanticParseInfo parseInfo, SemanticSchema semanticSchema) {
String correctSql = parseInfo.getSqlInfo().getCorrectS2SQL();
List<String> groupByFields = SqlSelectHelper.getGroupByFields(correctSql);
List<String> metricFields = SqlSelectHelper.getAggregateFields(correctSql);
List<String> whereFields = SqlSelectHelper.getWhereFields(correctSql);
List<String> dimensionFields = getDimensionFields(groupByFields, whereFields);
if (CollectionUtils.isEmpty(metricFields) || StringUtils.isBlank(correctSql)) {
return correctSql;
}
Set<String> metricToRemove = Sets.newHashSet();
Set<String> groupByToRemove = Sets.newHashSet();
Set<String> whereFieldsToRemove = Sets.newHashSet();
for (String metricName : metricFields) {
SchemaElement metricElement = semanticSchema.getElementByName(SchemaElementType.METRIC, metricName);
if (metricElement == null) {
metricToRemove.add(metricName);
}
if (!checkNecessaryDimension(metricElement, semanticSchema, dimensionFields)) {
metricToRemove.add(metricName);
}
}
for (String dimensionName : whereFields) {
if (TimeDimensionEnum.containsTimeDimension(dimensionName)) {
continue;
}
if (!checkInModelSchema(dimensionName, SchemaElementType.DIMENSION, semanticSchema)) {
whereFieldsToRemove.add(dimensionName);
}
if (!checkDrillDownDimension(dimensionName, metricFields, semanticSchema)) {
whereFieldsToRemove.add(dimensionName);
}
}
for (String dimensionName : groupByFields) {
if (TimeDimensionEnum.containsTimeDimension(dimensionName)) {
continue;
}
if (!checkInModelSchema(dimensionName, SchemaElementType.DIMENSION, semanticSchema)) {
groupByToRemove.add(dimensionName);
}
if (!checkDrillDownDimension(dimensionName, metricFields, semanticSchema)) {
groupByToRemove.add(dimensionName);
}
}
return removeFieldInSql(correctSql, metricToRemove, groupByToRemove, whereFieldsToRemove);
}
/**
* To check whether the dimension bound to the metric exists,
* eg: metric like UV is calculated in a certain dimension, it cannot be used on other dimensions.
*/
private boolean checkNecessaryDimension(SchemaElement metric, SemanticSchema semanticSchema,
List<String> dimensionFields) {
List<String> necessaryDimensions = getNecessaryDimensionNames(metric, semanticSchema);
if (CollectionUtils.isEmpty(necessaryDimensions)) {
return true;
}
for (String dimension : necessaryDimensions) {
if (!dimensionFields.contains(dimension)) {
return false;
}
}
return true;
}
/**
* To check whether the dimension can drill down the metric,
* eg: some descriptive dimensions are not suitable as drill-down dimensions
*/
private boolean checkDrillDownDimension(String dimensionName, List<String> metrics,
SemanticSchema semanticSchema) {
List<SchemaElement> metricElements = semanticSchema.getMetrics().stream()
.filter(schemaElement -> metrics.contains(schemaElement.getName()))
.collect(Collectors.toList());
if (CollectionUtils.isEmpty(metricElements)) {
return false;
}
List<String> relateDimensions = metricElements.stream()
.filter(schemaElement -> !CollectionUtils.isEmpty(schemaElement.getRelatedSchemaElements()))
.map(schemaElement -> schemaElement.getRelatedSchemaElements().stream()
.map(RelatedSchemaElement::getDimensionId).collect(Collectors.toList()))
.flatMap(Collection::stream)
.map(id -> convertDimensionIdToName(id, semanticSchema))
.filter(Objects::nonNull)
.collect(Collectors.toList());
//if no metric has drill down dimension, return true
if (CollectionUtils.isEmpty(relateDimensions)) {
return true;
}
//if this dimension not in relate drill-down dimensions, return false
return relateDimensions.contains(dimensionName);
}
private List<String> getNecessaryDimensionNames(SchemaElement metric, SemanticSchema semanticSchema) {
List<Long> necessaryDimensionIds = getNecessaryDimensions(metric);
return necessaryDimensionIds.stream().map(id -> convertDimensionIdToName(id, semanticSchema))
.filter(Objects::nonNull).collect(Collectors.toList());
}
private List<Long> getNecessaryDimensions(SchemaElement metric) {
if (metric == null) {
return Lists.newArrayList();
}
List<RelatedSchemaElement> relateSchemaElements = metric.getRelatedSchemaElements();
if (CollectionUtils.isEmpty(relateSchemaElements)) {
return Lists.newArrayList();
}
return relateSchemaElements.stream()
.filter(RelatedSchemaElement::isNecessary).map(RelatedSchemaElement::getDimensionId)
.collect(Collectors.toList());
}
private List<String> getDimensionFields(List<String> groupByFields, List<String> whereFields) {
List<String> dimensionFields = Lists.newArrayList();
if (!CollectionUtils.isEmpty(groupByFields)) {
dimensionFields.addAll(groupByFields);
}
if (!CollectionUtils.isEmpty(whereFields)) {
dimensionFields.addAll(whereFields);
}
return dimensionFields;
}
private String convertDimensionIdToName(Long id, SemanticSchema semanticSchema) {
SchemaElement schemaElement = semanticSchema.getElement(SchemaElementType.DIMENSION, id);
if (schemaElement == null) {
return null;
}
return schemaElement.getName();
}
private boolean checkInModelSchema(String name, SchemaElementType type, SemanticSchema semanticSchema) {
SchemaElement schemaElement = semanticSchema.getElementByName(type, name);
return schemaElement != null;
}
private boolean checkHasMetric(String correctSql, SemanticSchema semanticSchema) {
List<String> selectFields = SqlSelectHelper.getSelectFields(correctSql);
List<String> aggFields = SqlSelectHelper.getAggregateFields(correctSql);
List<String> collect = semanticSchema.getMetrics().stream()
.map(SchemaElement::getName).collect(Collectors.toList());
for (String field : selectFields) {
if (collect.contains(field)) {
return true;
}
}
return !CollectionUtils.isEmpty(aggFields);
}
private static String removeFieldInSql(String sql, Set<String> metricToRemove,
Set<String> dimensionByToRemove, Set<String> whereFieldsToRemove) {
sql = SqlRemoveHelper.removeWhereCondition(sql, whereFieldsToRemove);
sql = SqlRemoveHelper.removeSelect(sql, metricToRemove);
sql = SqlRemoveHelper.removeSelect(sql, dimensionByToRemove);
sql = SqlRemoveHelper.removeGroupBy(sql, dimensionByToRemove);
sql = SqlRemoveHelper.removeNumberFilter(sql);
return sql;
}
}

View File

@@ -82,12 +82,17 @@ public class ChatConfigController {
return semanticInterpreter.getDomainList(user);
}
//Compatible with front-end
@GetMapping("/viewList")
public List<ViewResp> getViewList() {
//Compatible with front-end
return semanticInterpreter.getViewList(null);
}
@GetMapping("/viewList/{domainId}")
public List<ViewResp> getViewList(@PathVariable("domainId") Long domainId) {
return semanticInterpreter.getViewList(domainId);
}
@PostMapping("/dimension/page")
public PageInfo<DimensionResp> getDimension(@RequestBody PageDimensionReq pageDimensionReq) {
return semanticInterpreter.getDimensionPage(pageDimensionReq);

View File

@@ -102,10 +102,10 @@ public class SemanticService {
}
entityInfo.setViewInfo(viewInfo);
TagTypeDefaultConfig tagTypeDefaultConfig = viewSchema.getTagTypeDefaultConfig();
if (tagTypeDefaultConfig == null) {
if (tagTypeDefaultConfig == null || tagTypeDefaultConfig.getDefaultDisplayInfo() == null) {
return entityInfo;
}
List<DataInfo> dimensions = tagTypeDefaultConfig.getDimensionIds().stream()
List<DataInfo> dimensions = tagTypeDefaultConfig.getDefaultDisplayInfo().getDimensionIds().stream()
.map(id -> {
SchemaElement element = viewSchema.getElement(SchemaElementType.DIMENSION, id);
if (element == null) {
@@ -113,7 +113,7 @@ public class SemanticService {
}
return new DataInfo(element.getId().intValue(), element.getName(), element.getBizName(), null);
}).filter(Objects::nonNull).collect(Collectors.toList());
List<DataInfo> metrics = tagTypeDefaultConfig.getDimensionIds().stream()
List<DataInfo> metrics = tagTypeDefaultConfig.getDefaultDisplayInfo().getDimensionIds().stream()
.map(id -> {
SchemaElement element = viewSchema.getElement(SchemaElementType.METRIC, id);
if (element == null) {

View File

@@ -1,162 +0,0 @@
package com.tencent.supersonic.chat.server.processor;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.tencent.supersonic.chat.api.pojo.ViewSchema;
import com.tencent.supersonic.chat.api.pojo.RelatedSchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.chat.server.processor.parse.MetricCheckProcessor;
import java.util.List;
import java.util.Set;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
class MetricCheckProcessorTest {
@Test
void testProcessCorrectSql_necessaryDimension_groupBy() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 用户名, sum(访问次数), count(distinct 访问用户数) from 超音数 group by 用户名";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT 用户名, sum(访问次数) FROM 超音数 GROUP BY 用户名";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_necessaryDimension_where() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 用户名, sum(访问次数), count(distinct 访问用户数) from 超音数 where 部门 = 'HR' group by 用户名";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT 用户名, sum(访问次数), count(DISTINCT 访问用户数) FROM 超音数 "
+ "WHERE 部门 = 'HR' GROUP BY 用户名";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_dimensionNotDrillDown_groupBy() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 页面, 部门, sum(访问次数), count(distinct 访问用户数) from 超音数 group by 页面, 部门";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT 部门, sum(访问次数), count(DISTINCT 访问用户数) FROM 超音数 GROUP BY 部门";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_dimensionNotDrillDown_where() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 部门, sum(访问次数), count(distinct 访问用户数) from 超音数 where 页面 = 'P1' group by 部门";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT 部门, sum(访问次数), count(DISTINCT 访问用户数) FROM 超音数 GROUP BY 部门";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_dimensionNotDrillDown_necessaryDimension() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 页面, sum(访问次数), count(distinct 访问用户数) from 超音数 group by 页面";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT sum(访问次数) FROM 超音数";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_dimensionDrillDown() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 用户名, 部门, sum(访问次数), count(distinct 访问用户数) from 超音数 group by 用户名, 部门";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo, mockModelSchema());
String expectedProcessedSql = "SELECT 用户名, 部门, sum(访问次数), count(DISTINCT 访问用户数) FROM 超音数 GROUP BY 用户名, 部门";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_noDrillDownDimensionSetting() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 页面, 用户名, sum(访问次数), count(distinct 访问用户数) from 超音数 group by 页面, 用户名";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo,
mockModelSchemaNoDimensionSetting());
String expectedProcessedSql = "SELECT 页面, 用户名, sum(访问次数), count(DISTINCT 访问用户数) FROM 超音数 GROUP BY 页面, 用户名";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_noDrillDownDimensionSetting_noAgg() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 访问次数 from 超音数";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo,
mockModelSchemaNoDimensionSetting());
String expectedProcessedSql = "select 访问次数 from 超音数";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
@Test
void testProcessCorrectSql_noDrillDownDimensionSetting_count() {
MetricCheckProcessor metricCheckPostProcessor = new MetricCheckProcessor();
String correctSql = "select 部门, count(*) from 超音数 group by 部门";
SemanticParseInfo parseInfo = mockParseInfo(correctSql);
String actualProcessedSql = metricCheckPostProcessor.processCorrectSql(parseInfo,
mockModelSchemaNoDimensionSetting());
String expectedProcessedSql = "SELECT count(*) FROM 超音数";
Assertions.assertEquals(expectedProcessedSql, actualProcessedSql);
}
/**
* 访问次数 drill down dimension is 用户名 and 部门
* 访问用户数 drill down dimension is 部门, and 部门 is necessary, 部门 need in select and group by or where expressions
*/
private SemanticSchema mockModelSchema() {
ViewSchema modelSchema = new ViewSchema();
Set<SchemaElement> metrics = Sets.newHashSet(
mockElement(1L, "访问次数", SchemaElementType.METRIC,
Lists.newArrayList(RelatedSchemaElement.builder().dimensionId(2L).isNecessary(false).build(),
RelatedSchemaElement.builder().dimensionId(1L).isNecessary(false).build())),
mockElement(2L, "访问用户数", SchemaElementType.METRIC,
Lists.newArrayList(RelatedSchemaElement.builder().dimensionId(2L).isNecessary(true).build()))
);
modelSchema.setMetrics(metrics);
modelSchema.setDimensions(mockDimensions());
return new SemanticSchema(Lists.newArrayList(modelSchema));
}
private SemanticSchema mockModelSchemaNoDimensionSetting() {
ViewSchema modelSchema = new ViewSchema();
Set<SchemaElement> metrics = Sets.newHashSet(
mockElement(1L, "访问次数", SchemaElementType.METRIC, Lists.newArrayList()),
mockElement(2L, "访问用户数", SchemaElementType.METRIC, Lists.newArrayList())
);
modelSchema.setMetrics(metrics);
modelSchema.setDimensions(mockDimensions());
return new SemanticSchema(Lists.newArrayList(modelSchema));
}
private Set<SchemaElement> mockDimensions() {
return Sets.newHashSet(
mockElement(1L, "用户名", SchemaElementType.DIMENSION, Lists.newArrayList()),
mockElement(2L, "部门", SchemaElementType.DIMENSION, Lists.newArrayList()),
mockElement(3L, "页面", SchemaElementType.DIMENSION, Lists.newArrayList())
);
}
private SchemaElement mockElement(Long id, String name, SchemaElementType type,
List<RelatedSchemaElement> relateSchemaElements) {
return SchemaElement.builder().id(id).name(name).type(type)
.relatedSchemaElements(relateSchemaElements).build();
}
private SemanticParseInfo mockParseInfo(String correctSql) {
SemanticParseInfo semanticParseInfo = new SemanticParseInfo();
semanticParseInfo.getSqlInfo().setCorrectS2SQL(correctSql);
return semanticParseInfo;
}
}