mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 19:51:00 +00:00
[improvement][headless]Move discovery of query models from SemanticNode to QueryParser.
This commit is contained in:
@@ -45,14 +45,6 @@ public class SemanticSchemaResp {
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
public List<MetricSchemaResp> getMetrics(List<String> bizNames) {
|
||||
Map<String, MetricSchemaResp> metricLowerToNameMap = metrics.stream().collect(
|
||||
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
|
||||
return bizNames.stream().map(String::toLowerCase)
|
||||
.filter(entry -> metricLowerToNameMap.containsKey(entry))
|
||||
.map(entry -> metricLowerToNameMap.get(entry)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public DimSchemaResp getDimension(String bizName) {
|
||||
return dimensions.stream()
|
||||
.filter(dimension -> bizName.equalsIgnoreCase(dimension.getBizName())).findFirst()
|
||||
@@ -64,14 +56,6 @@ public class SemanticSchemaResp {
|
||||
.orElse(null);
|
||||
}
|
||||
|
||||
public List<DimSchemaResp> getDimensions(List<String> bizNames) {
|
||||
Map<String, DimSchemaResp> dimLowerToNameMap = dimensions.stream().collect(
|
||||
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
|
||||
return bizNames.stream().map(String::toLowerCase)
|
||||
.filter(entry -> dimLowerToNameMap.containsKey(entry))
|
||||
.map(entry -> dimLowerToNameMap.get(entry)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public Set<String> getNameFromBizNames(Set<String> bizNames) {
|
||||
Set<String> names = new HashSet<>();
|
||||
for (String bizName : bizNames) {
|
||||
|
||||
@@ -5,15 +5,21 @@ import com.tencent.supersonic.common.pojo.ColumnOrder;
|
||||
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* An ontology query comprises metrics/dimensions that are relevant to the semantic query. Note that
|
||||
* metrics/dimensions in the ontology query must be a subset of an ontology.
|
||||
*/
|
||||
@Data
|
||||
public class OntologyQuery {
|
||||
|
||||
private Set<ModelResp> models = Sets.newHashSet();
|
||||
private Set<MetricSchemaResp> metrics = Sets.newHashSet();
|
||||
private Set<DimSchemaResp> dimensions = Sets.newHashSet();
|
||||
private Set<String> fields = Sets.newHashSet();
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package com.tencent.supersonic.headless.core.translator.parser;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
|
||||
@@ -9,7 +11,9 @@ import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.SemanticSchemaResp;
|
||||
import com.tencent.supersonic.headless.core.pojo.Ontology;
|
||||
import com.tencent.supersonic.headless.core.pojo.OntologyQuery;
|
||||
import com.tencent.supersonic.headless.core.pojo.QueryStatement;
|
||||
import com.tencent.supersonic.headless.core.pojo.SqlQuery;
|
||||
@@ -18,9 +22,8 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* This parser rewrites S2SQL including conversion from metric/dimension name to bizName and build
|
||||
@@ -57,16 +60,23 @@ public class SqlQueryParser implements QueryParser {
|
||||
}
|
||||
|
||||
// build ontologyQuery
|
||||
Ontology ontology = queryStatement.getOntology();
|
||||
List<String> allQueryFields = SqlSelectHelper.getAllSelectFields(sqlQuery.getSql());
|
||||
List<MetricSchemaResp> queryMetrics = semanticSchema.getMetrics(allQueryFields);
|
||||
List<DimSchemaResp> queryDimensions = semanticSchema.getDimensions(allQueryFields);
|
||||
OntologyQuery ontologyQuery = new OntologyQuery();
|
||||
queryStatement.setOntologyQuery(ontologyQuery);
|
||||
|
||||
List<MetricSchemaResp> queryMetrics = findQueryMetrics(ontology, allQueryFields);
|
||||
ontologyQuery.getMetrics().addAll(queryMetrics);
|
||||
|
||||
List<DimSchemaResp> queryDimensions = findQueryDimensions(ontology, allQueryFields);
|
||||
ontologyQuery.getDimensions().addAll(queryDimensions);
|
||||
|
||||
List<ModelResp> queryModels = findQueryModels(ontology, queryMetrics, queryDimensions);
|
||||
ontologyQuery.getModels().addAll(queryModels);
|
||||
|
||||
AggOption sqlQueryAggOption = getAggOption(sqlQuery.getSql(), queryMetrics);
|
||||
ontologyQuery.setAggOption(sqlQueryAggOption);
|
||||
queryStatement.setOntologyQuery(ontologyQuery);
|
||||
|
||||
log.info("parse sqlQuery [{}] ", sqlQuery);
|
||||
}
|
||||
|
||||
@@ -126,4 +136,57 @@ public class SqlQueryParser implements QueryParser {
|
||||
queryStatement.getSqlQuery().setSql(newSql);
|
||||
}
|
||||
|
||||
public List<MetricSchemaResp> findQueryMetrics(Ontology ontology, List<String> bizNames) {
|
||||
Map<String, MetricSchemaResp> metricLowerToNameMap = ontology.getMetrics().stream().collect(
|
||||
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
|
||||
return bizNames.stream().map(String::toLowerCase)
|
||||
.filter(entry -> metricLowerToNameMap.containsKey(entry))
|
||||
.map(entry -> metricLowerToNameMap.get(entry)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public List<DimSchemaResp> findQueryDimensions(Ontology ontology, List<String> bizNames) {
|
||||
Map<String, DimSchemaResp> dimLowerToNameMap = ontology.getDimensions().stream().collect(
|
||||
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
|
||||
return bizNames.stream().map(String::toLowerCase)
|
||||
.filter(entry -> dimLowerToNameMap.containsKey(entry))
|
||||
.map(entry -> dimLowerToNameMap.get(entry)).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public List<ModelResp> findQueryModels(Ontology ontology, List<MetricSchemaResp> queryMetrics,
|
||||
List<DimSchemaResp> queryDimensions) {
|
||||
// first, sort models based on the number of query metrics
|
||||
Map<String, Integer> modelMetricCount = Maps.newHashMap();
|
||||
queryMetrics.forEach(m -> {
|
||||
if (!modelMetricCount.containsKey(m.getModelBizName())) {
|
||||
modelMetricCount.put(m.getModelBizName(), 1);
|
||||
} else {
|
||||
int count = modelMetricCount.get(m.getModelBizName());
|
||||
modelMetricCount.put(m.getModelBizName(), count + 1);
|
||||
}
|
||||
});
|
||||
List<String> metricsDataModels = modelMetricCount.entrySet().stream()
|
||||
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// second, sort models based on the number of query dimensions
|
||||
Map<String, Integer> modelDimCount = Maps.newHashMap();
|
||||
queryDimensions.forEach(m -> {
|
||||
if (!modelDimCount.containsKey(m.getModelBizName())) {
|
||||
modelDimCount.put(m.getModelBizName(), 1);
|
||||
} else {
|
||||
int count = modelDimCount.get(m.getModelBizName());
|
||||
modelDimCount.put(m.getModelBizName(), count + 1);
|
||||
}
|
||||
});
|
||||
List<String> dimDataModels = modelDimCount.entrySet().stream()
|
||||
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Set<String> dataModelNames = Sets.newLinkedHashSet();
|
||||
dataModelNames.addAll(dimDataModels);
|
||||
dataModelNames.addAll(metricsDataModels);
|
||||
return dataModelNames.stream().map(bizName -> ontology.getModelMap().get(bizName))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ package com.tencent.supersonic.headless.core.translator.parser.calcite;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.tencent.supersonic.common.calcite.Configuration;
|
||||
import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
|
||||
import com.tencent.supersonic.common.pojo.enums.EngineType;
|
||||
@@ -127,56 +126,6 @@ public class DataModelNode extends SemanticNode {
|
||||
return sqlNode;
|
||||
}
|
||||
|
||||
public static void getQueryDimensionMeasure(Ontology ontology, OntologyQuery ontologyQuery,
|
||||
Set<String> queryDimensions, Set<String> queryMeasures) {
|
||||
ontologyQuery.getMetrics().forEach(m -> {
|
||||
if (Objects.nonNull(m.getMetricDefineByMeasureParams())) {
|
||||
m.getMetricDefineByMeasureParams().getMeasures()
|
||||
.forEach(mm -> queryMeasures.add(mm.getName()));
|
||||
}
|
||||
if (Objects.nonNull(m.getMetricDefineByFieldParams())) {
|
||||
m.getMetricDefineByFieldParams().getFields()
|
||||
.forEach(mm -> queryMeasures.add(mm.getFieldName()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static List<ModelResp> getQueryDataModelsV2(Ontology ontology, OntologyQuery query) {
|
||||
// first, sort models based on the number of query metrics
|
||||
Map<String, Integer> modelMetricCount = Maps.newHashMap();
|
||||
query.getMetrics().forEach(m -> {
|
||||
if (!modelMetricCount.containsKey(m.getModelBizName())) {
|
||||
modelMetricCount.put(m.getModelBizName(), 1);
|
||||
} else {
|
||||
int count = modelMetricCount.get(m.getModelBizName());
|
||||
modelMetricCount.put(m.getModelBizName(), count + 1);
|
||||
}
|
||||
});
|
||||
List<String> metricsDataModels = modelMetricCount.entrySet().stream()
|
||||
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// first, sort models based on the number of query dimensions
|
||||
Map<String, Integer> modelDimCount = Maps.newHashMap();
|
||||
query.getDimensions().forEach(m -> {
|
||||
if (!modelDimCount.containsKey(m.getModelBizName())) {
|
||||
modelDimCount.put(m.getModelBizName(), 1);
|
||||
} else {
|
||||
int count = modelDimCount.get(m.getModelBizName());
|
||||
modelDimCount.put(m.getModelBizName(), count + 1);
|
||||
}
|
||||
});
|
||||
List<String> dimDataModels = modelDimCount.entrySet().stream()
|
||||
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Set<String> dataModelNames = Sets.newLinkedHashSet();
|
||||
dataModelNames.addAll(dimDataModels);
|
||||
dataModelNames.addAll(metricsDataModels);
|
||||
return dataModelNames.stream().map(bizName -> ontology.getModelMap().get(bizName))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static List<ModelResp> getQueryDataModels(Ontology ontology,
|
||||
OntologyQuery ontologyQuery) {
|
||||
// get query measures and dimensions
|
||||
@@ -212,6 +161,20 @@ public class DataModelNode extends SemanticNode {
|
||||
return relatedDataModels;
|
||||
}
|
||||
|
||||
public static void getQueryDimensionMeasure(Ontology ontology, OntologyQuery ontologyQuery,
|
||||
Set<String> queryDimensions, Set<String> queryMeasures) {
|
||||
ontologyQuery.getMetrics().forEach(m -> {
|
||||
if (Objects.nonNull(m.getMetricDefineByMeasureParams())) {
|
||||
m.getMetricDefineByMeasureParams().getMeasures()
|
||||
.forEach(mm -> queryMeasures.add(mm.getName()));
|
||||
}
|
||||
if (Objects.nonNull(m.getMetricDefineByFieldParams())) {
|
||||
m.getMetricDefineByFieldParams().getFields()
|
||||
.forEach(mm -> queryMeasures.add(mm.getFieldName()));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static ModelResp findBaseModel(Ontology ontology, OntologyQuery query) {
|
||||
ModelResp dataModel = null;
|
||||
// first, try to find the model with the most query metrics
|
||||
|
||||
@@ -41,14 +41,12 @@ public class SqlBuilder {
|
||||
ontologyQuery.setLimit(0L);
|
||||
}
|
||||
|
||||
// find relevant data models
|
||||
List<ModelResp> dataModels =
|
||||
DataModelNode.getQueryDataModelsV2(schema.getOntology(), ontologyQuery);
|
||||
Set<ModelResp> dataModels = ontologyQuery.getModels();
|
||||
if (dataModels == null || dataModels.isEmpty()) {
|
||||
throw new Exception("data model not found");
|
||||
}
|
||||
|
||||
TableView tableView = render(ontologyQuery, dataModels, scope, schema);
|
||||
TableView tableView = render(ontologyQuery, new ArrayList<>(dataModels), scope, schema);
|
||||
SqlNode parserNode = tableView.build();
|
||||
DatabaseResp database = queryStatement.getOntology().getDatabase();
|
||||
EngineType engineType = EngineType.fromString(database.getType());
|
||||
|
||||
Reference in New Issue
Block a user