[improvement][headless]Move discovery of query models from SemanticNode to QueryParser.

This commit is contained in:
jerryjzhang
2024-12-22 17:45:32 +08:00
parent 10a510409f
commit d8b8c4e6b9
5 changed files with 91 additions and 77 deletions

View File

@@ -45,14 +45,6 @@ public class SemanticSchemaResp {
.orElse(null);
}
public List<MetricSchemaResp> getMetrics(List<String> bizNames) {
Map<String, MetricSchemaResp> metricLowerToNameMap = metrics.stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> metricLowerToNameMap.containsKey(entry))
.map(entry -> metricLowerToNameMap.get(entry)).collect(Collectors.toList());
}
public DimSchemaResp getDimension(String bizName) {
return dimensions.stream()
.filter(dimension -> bizName.equalsIgnoreCase(dimension.getBizName())).findFirst()
@@ -64,14 +56,6 @@ public class SemanticSchemaResp {
.orElse(null);
}
public List<DimSchemaResp> getDimensions(List<String> bizNames) {
Map<String, DimSchemaResp> dimLowerToNameMap = dimensions.stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> dimLowerToNameMap.containsKey(entry))
.map(entry -> dimLowerToNameMap.get(entry)).collect(Collectors.toList());
}
public Set<String> getNameFromBizNames(Set<String> bizNames) {
Set<String> names = new HashSet<>();
for (String bizName : bizNames) {

View File

@@ -5,15 +5,21 @@ import com.tencent.supersonic.common.pojo.ColumnOrder;
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import lombok.Data;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
/**
* An ontology query comprises metrics/dimensions that are relevant to the semantic query. Note that
* metrics/dimensions in the ontology query must be a subset of an ontology.
*/
@Data
public class OntologyQuery {
private Set<ModelResp> models = Sets.newHashSet();
private Set<MetricSchemaResp> metrics = Sets.newHashSet();
private Set<DimSchemaResp> dimensions = Sets.newHashSet();
private Set<String> fields = Sets.newHashSet();

View File

@@ -1,5 +1,7 @@
package com.tencent.supersonic.headless.core.translator.parser;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
@@ -9,7 +11,9 @@ import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import com.tencent.supersonic.headless.api.pojo.response.SemanticSchemaResp;
import com.tencent.supersonic.headless.core.pojo.Ontology;
import com.tencent.supersonic.headless.core.pojo.OntologyQuery;
import com.tencent.supersonic.headless.core.pojo.QueryStatement;
import com.tencent.supersonic.headless.core.pojo.SqlQuery;
@@ -18,9 +22,8 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.*;
import java.util.stream.Collectors;
/**
* This parser rewrites S2SQL including conversion from metric/dimension name to bizName and build
@@ -57,16 +60,23 @@ public class SqlQueryParser implements QueryParser {
}
// build ontologyQuery
Ontology ontology = queryStatement.getOntology();
List<String> allQueryFields = SqlSelectHelper.getAllSelectFields(sqlQuery.getSql());
List<MetricSchemaResp> queryMetrics = semanticSchema.getMetrics(allQueryFields);
List<DimSchemaResp> queryDimensions = semanticSchema.getDimensions(allQueryFields);
OntologyQuery ontologyQuery = new OntologyQuery();
queryStatement.setOntologyQuery(ontologyQuery);
List<MetricSchemaResp> queryMetrics = findQueryMetrics(ontology, allQueryFields);
ontologyQuery.getMetrics().addAll(queryMetrics);
List<DimSchemaResp> queryDimensions = findQueryDimensions(ontology, allQueryFields);
ontologyQuery.getDimensions().addAll(queryDimensions);
List<ModelResp> queryModels = findQueryModels(ontology, queryMetrics, queryDimensions);
ontologyQuery.getModels().addAll(queryModels);
AggOption sqlQueryAggOption = getAggOption(sqlQuery.getSql(), queryMetrics);
ontologyQuery.setAggOption(sqlQueryAggOption);
queryStatement.setOntologyQuery(ontologyQuery);
log.info("parse sqlQuery [{}] ", sqlQuery);
}
@@ -126,4 +136,57 @@ public class SqlQueryParser implements QueryParser {
queryStatement.getSqlQuery().setSql(newSql);
}
public List<MetricSchemaResp> findQueryMetrics(Ontology ontology, List<String> bizNames) {
Map<String, MetricSchemaResp> metricLowerToNameMap = ontology.getMetrics().stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> metricLowerToNameMap.containsKey(entry))
.map(entry -> metricLowerToNameMap.get(entry)).collect(Collectors.toList());
}
public List<DimSchemaResp> findQueryDimensions(Ontology ontology, List<String> bizNames) {
Map<String, DimSchemaResp> dimLowerToNameMap = ontology.getDimensions().stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> dimLowerToNameMap.containsKey(entry))
.map(entry -> dimLowerToNameMap.get(entry)).collect(Collectors.toList());
}
public List<ModelResp> findQueryModels(Ontology ontology, List<MetricSchemaResp> queryMetrics,
List<DimSchemaResp> queryDimensions) {
// first, sort models based on the number of query metrics
Map<String, Integer> modelMetricCount = Maps.newHashMap();
queryMetrics.forEach(m -> {
if (!modelMetricCount.containsKey(m.getModelBizName())) {
modelMetricCount.put(m.getModelBizName(), 1);
} else {
int count = modelMetricCount.get(m.getModelBizName());
modelMetricCount.put(m.getModelBizName(), count + 1);
}
});
List<String> metricsDataModels = modelMetricCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
// second, sort models based on the number of query dimensions
Map<String, Integer> modelDimCount = Maps.newHashMap();
queryDimensions.forEach(m -> {
if (!modelDimCount.containsKey(m.getModelBizName())) {
modelDimCount.put(m.getModelBizName(), 1);
} else {
int count = modelDimCount.get(m.getModelBizName());
modelDimCount.put(m.getModelBizName(), count + 1);
}
});
List<String> dimDataModels = modelDimCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
Set<String> dataModelNames = Sets.newLinkedHashSet();
dataModelNames.addAll(dimDataModels);
dataModelNames.addAll(metricsDataModels);
return dataModelNames.stream().map(bizName -> ontology.getModelMap().get(bizName))
.collect(Collectors.toList());
}
}

View File

@@ -2,7 +2,6 @@ package com.tencent.supersonic.headless.core.translator.parser.calcite;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.tencent.supersonic.common.calcite.Configuration;
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.pojo.enums.EngineType;
@@ -127,56 +126,6 @@ public class DataModelNode extends SemanticNode {
return sqlNode;
}
public static void getQueryDimensionMeasure(Ontology ontology, OntologyQuery ontologyQuery,
Set<String> queryDimensions, Set<String> queryMeasures) {
ontologyQuery.getMetrics().forEach(m -> {
if (Objects.nonNull(m.getMetricDefineByMeasureParams())) {
m.getMetricDefineByMeasureParams().getMeasures()
.forEach(mm -> queryMeasures.add(mm.getName()));
}
if (Objects.nonNull(m.getMetricDefineByFieldParams())) {
m.getMetricDefineByFieldParams().getFields()
.forEach(mm -> queryMeasures.add(mm.getFieldName()));
}
});
}
public static List<ModelResp> getQueryDataModelsV2(Ontology ontology, OntologyQuery query) {
// first, sort models based on the number of query metrics
Map<String, Integer> modelMetricCount = Maps.newHashMap();
query.getMetrics().forEach(m -> {
if (!modelMetricCount.containsKey(m.getModelBizName())) {
modelMetricCount.put(m.getModelBizName(), 1);
} else {
int count = modelMetricCount.get(m.getModelBizName());
modelMetricCount.put(m.getModelBizName(), count + 1);
}
});
List<String> metricsDataModels = modelMetricCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
// first, sort models based on the number of query dimensions
Map<String, Integer> modelDimCount = Maps.newHashMap();
query.getDimensions().forEach(m -> {
if (!modelDimCount.containsKey(m.getModelBizName())) {
modelDimCount.put(m.getModelBizName(), 1);
} else {
int count = modelDimCount.get(m.getModelBizName());
modelDimCount.put(m.getModelBizName(), count + 1);
}
});
List<String> dimDataModels = modelDimCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
Set<String> dataModelNames = Sets.newLinkedHashSet();
dataModelNames.addAll(dimDataModels);
dataModelNames.addAll(metricsDataModels);
return dataModelNames.stream().map(bizName -> ontology.getModelMap().get(bizName))
.collect(Collectors.toList());
}
public static List<ModelResp> getQueryDataModels(Ontology ontology,
OntologyQuery ontologyQuery) {
// get query measures and dimensions
@@ -212,6 +161,20 @@ public class DataModelNode extends SemanticNode {
return relatedDataModels;
}
public static void getQueryDimensionMeasure(Ontology ontology, OntologyQuery ontologyQuery,
Set<String> queryDimensions, Set<String> queryMeasures) {
ontologyQuery.getMetrics().forEach(m -> {
if (Objects.nonNull(m.getMetricDefineByMeasureParams())) {
m.getMetricDefineByMeasureParams().getMeasures()
.forEach(mm -> queryMeasures.add(mm.getName()));
}
if (Objects.nonNull(m.getMetricDefineByFieldParams())) {
m.getMetricDefineByFieldParams().getFields()
.forEach(mm -> queryMeasures.add(mm.getFieldName()));
}
});
}
private static ModelResp findBaseModel(Ontology ontology, OntologyQuery query) {
ModelResp dataModel = null;
// first, try to find the model with the most query metrics

View File

@@ -41,14 +41,12 @@ public class SqlBuilder {
ontologyQuery.setLimit(0L);
}
// find relevant data models
List<ModelResp> dataModels =
DataModelNode.getQueryDataModelsV2(schema.getOntology(), ontologyQuery);
Set<ModelResp> dataModels = ontologyQuery.getModels();
if (dataModels == null || dataModels.isEmpty()) {
throw new Exception("data model not found");
}
TableView tableView = render(ontologyQuery, dataModels, scope, schema);
TableView tableView = render(ontologyQuery, new ArrayList<>(dataModels), scope, schema);
SqlNode parserNode = tableView.build();
DatabaseResp database = queryStatement.getOntology().getDatabase();
EngineType engineType = EngineType.fromString(database.getType());