[improvement][headless]Move discovery of query models from SemanticNode to SqlQueryParser.

[improvement][headless]Move discovery of query models from SemanticNode to `SqlQueryParser`.
This commit is contained in:
jerryjzhang
2024-12-22 20:29:51 +08:00
parent d8b8c4e6b9
commit 214d90772d
10 changed files with 180 additions and 182 deletions

View File

@@ -3,20 +3,14 @@ package com.tencent.supersonic.headless.api.pojo.response;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.pojo.ModelRela;
import com.tencent.supersonic.common.pojo.enums.QueryType;
import com.tencent.supersonic.headless.api.pojo.SchemaItem;
import com.tencent.supersonic.headless.api.pojo.enums.SchemaType;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Data
@AllArgsConstructor
@@ -72,31 +66,4 @@ public class SemanticSchemaResp {
return names;
}
public Map<String, String> getNameToBizNameMap() {
// support fieldName and field alias to bizName
Map<String, String> dimensionResults = dimensions.stream().flatMap(
entry -> getPairStream(entry.getAlias(), entry.getName(), entry.getBizName()))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (k1, k2) -> k1));
Map<String, String> metricResults = metrics.stream().flatMap(
entry -> getPairStream(entry.getAlias(), entry.getName(), entry.getBizName()))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (k1, k2) -> k1));
dimensionResults.putAll(metricResults);
return dimensionResults;
}
private Stream<Pair<String, String>> getPairStream(String aliasStr, String name,
String bizName) {
Set<Pair<String, String>> elements = new HashSet<>();
elements.add(Pair.of(name, bizName));
if (StringUtils.isNotBlank(aliasStr)) {
List<String> aliasList = SchemaItem.getAliasList(aliasStr);
for (String alias : aliasList) {
elements.add(Pair.of(alias, bizName));
}
}
return elements.stream();
}
}

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.headless.core.pojo;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.tencent.supersonic.common.pojo.ColumnOrder;
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
@@ -9,6 +10,7 @@ import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import lombok.Data;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@@ -19,22 +21,40 @@ import java.util.stream.Collectors;
@Data
public class OntologyQuery {
private Set<ModelResp> models = Sets.newHashSet();
private Set<MetricSchemaResp> metrics = Sets.newHashSet();
private Set<DimSchemaResp> dimensions = Sets.newHashSet();
private Map<String, ModelResp> modelMap = Maps.newHashMap();
private Map<String, Set<MetricSchemaResp>> metricMap = Maps.newHashMap();
private Map<String, Set<DimSchemaResp>> dimensionMap = Maps.newHashMap();
private Set<String> fields = Sets.newHashSet();
private Long limit;
private List<ColumnOrder> order;
private boolean nativeQuery = true;
private AggOption aggOption = AggOption.NATIVE;
public Set<MetricSchemaResp> getMetricsByModel(Long modelId) {
return metrics.stream().filter(m -> m.getModelId().equals(modelId))
.collect(Collectors.toSet());
public Set<ModelResp> getModels() {
return modelMap.values().stream().collect(Collectors.toSet());
}
public Set<DimSchemaResp> getDimensionsByModel(Long modelId) {
return dimensions.stream().filter(m -> m.getModelId().equals(modelId))
.collect(Collectors.toSet());
public Set<DimSchemaResp> getDimensions() {
Set<DimSchemaResp> dimensions = Sets.newHashSet();
dimensionMap.entrySet().forEach(entry -> {
dimensions.addAll(entry.getValue());
});
return dimensions;
}
public Set<MetricSchemaResp> getMetrics() {
Set<MetricSchemaResp> metrics = Sets.newHashSet();
metricMap.entrySet().forEach(entry -> {
metrics.addAll(entry.getValue());
});
return metrics;
}
public Set<MetricSchemaResp> getMetricsByModel(String modelName) {
return metricMap.get(modelName);
}
public Set<DimSchemaResp> getDimensionsByModel(String modelName) {
return dimensionMap.get(modelName);
}
}

View File

@@ -54,7 +54,9 @@ public class DimExpressionParser implements QueryParser {
for (DimSchemaResp queryDim : queryDimensions) {
queryDim.getFields().addAll(SqlSelectHelper.getFieldsFromExpr(queryDim.getExpr()));
queryFields.addAll(queryDim.getFields());
dim2Expr.put(queryDim.getBizName(), queryDim.getExpr());
if (!queryDim.getBizName().equals(queryDim.getExpr())) {
dim2Expr.put(queryDim.getBizName(), queryDim.getExpr());
}
}
return dim2Expr;

View File

@@ -1,6 +1,5 @@
package com.tencent.supersonic.headless.core.translator.parser;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.tencent.supersonic.common.jsqlparser.SqlReplaceHelper;
import com.tencent.supersonic.common.jsqlparser.SqlSelectFunctionHelper;
@@ -8,10 +7,9 @@ import com.tencent.supersonic.common.jsqlparser.SqlSelectHelper;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.EngineType;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.headless.api.pojo.SchemaItem;
import com.tencent.supersonic.headless.api.pojo.enums.AggOption;
import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp;
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
import com.tencent.supersonic.headless.api.pojo.response.SemanticSchemaResp;
import com.tencent.supersonic.headless.core.pojo.Ontology;
import com.tencent.supersonic.headless.core.pojo.OntologyQuery;
@@ -20,10 +18,12 @@ import com.tencent.supersonic.headless.core.pojo.SqlQuery;
import com.tencent.supersonic.headless.core.utils.SqlGenerateUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.springframework.stereotype.Component;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* This parser rewrites S2SQL including conversion from metric/dimension name to bizName and build
@@ -40,11 +40,20 @@ public class SqlQueryParser implements QueryParser {
@Override
public void parse(QueryStatement queryStatement) throws Exception {
// build ontologyQuery
SqlQuery sqlQuery = queryStatement.getSqlQuery();
List<String> queryFields = SqlSelectHelper.getAllSelectFields(sqlQuery.getSql());
Ontology ontology = queryStatement.getOntology();
OntologyQuery ontologyQuery = buildOntologyQuery(ontology, queryFields);
queryStatement.setOntologyQuery(ontologyQuery);
AggOption sqlQueryAggOption = getAggOption(sqlQuery.getSql(), ontologyQuery.getMetrics());
ontologyQuery.setAggOption(sqlQueryAggOption);
convertNameToBizName(queryStatement);
rewriteOrderBy(queryStatement);
// fill sqlQuery
SqlQuery sqlQuery = queryStatement.getSqlQuery();
String tableName = SqlSelectHelper.getTableName(sqlQuery.getSql());
if (StringUtils.isEmpty(tableName)) {
return;
@@ -59,28 +68,10 @@ public class SqlQueryParser implements QueryParser {
sqlQuery.setWithAlias(false);
}
// build ontologyQuery
Ontology ontology = queryStatement.getOntology();
List<String> allQueryFields = SqlSelectHelper.getAllSelectFields(sqlQuery.getSql());
OntologyQuery ontologyQuery = new OntologyQuery();
queryStatement.setOntologyQuery(ontologyQuery);
List<MetricSchemaResp> queryMetrics = findQueryMetrics(ontology, allQueryFields);
ontologyQuery.getMetrics().addAll(queryMetrics);
List<DimSchemaResp> queryDimensions = findQueryDimensions(ontology, allQueryFields);
ontologyQuery.getDimensions().addAll(queryDimensions);
List<ModelResp> queryModels = findQueryModels(ontology, queryMetrics, queryDimensions);
ontologyQuery.getModels().addAll(queryModels);
AggOption sqlQueryAggOption = getAggOption(sqlQuery.getSql(), queryMetrics);
ontologyQuery.setAggOption(sqlQueryAggOption);
log.info("parse sqlQuery [{}] ", sqlQuery);
}
private AggOption getAggOption(String sql, List<MetricSchemaResp> metricSchemas) {
private AggOption getAggOption(String sql, Set<MetricSchemaResp> metricSchemas) {
if (SqlSelectFunctionHelper.hasAggregateFunction(sql)) {
return AggOption.AGGREGATION;
}
@@ -113,9 +104,36 @@ public class SqlQueryParser implements QueryParser {
return AggOption.DEFAULT;
}
private Map<String, String> getNameToBizNameMap(OntologyQuery query) {
// support fieldName and field alias to bizName
Map<String, String> dimensionResults = query.getDimensions().stream().flatMap(
entry -> getPairStream(entry.getAlias(), entry.getName(), entry.getBizName()))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (k1, k2) -> k1));
Map<String, String> metricResults = query.getMetrics().stream().flatMap(
entry -> getPairStream(entry.getAlias(), entry.getName(), entry.getBizName()))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (k1, k2) -> k1));
dimensionResults.putAll(metricResults);
return dimensionResults;
}
private Stream<Pair<String, String>> getPairStream(String aliasStr, String name,
String bizName) {
Set<Pair<String, String>> elements = new HashSet<>();
elements.add(Pair.of(name, bizName));
if (StringUtils.isNotBlank(aliasStr)) {
List<String> aliasList = SchemaItem.getAliasList(aliasStr);
for (String alias : aliasList) {
elements.add(Pair.of(alias, bizName));
}
}
return elements.stream();
}
private void convertNameToBizName(QueryStatement queryStatement) {
SemanticSchemaResp semanticSchema = queryStatement.getSemanticSchema();
Map<String, String> fieldNameToBizNameMap = semanticSchema.getNameToBizNameMap();
Map<String, String> fieldNameToBizNameMap =
getNameToBizNameMap(queryStatement.getOntologyQuery());
String sql = queryStatement.getSqlQuery().getSql();
log.debug("dataSetId:{},convert name to bizName before:{}", queryStatement.getDataSetId(),
sql);
@@ -136,57 +154,70 @@ public class SqlQueryParser implements QueryParser {
queryStatement.getSqlQuery().setSql(newSql);
}
public List<MetricSchemaResp> findQueryMetrics(Ontology ontology, List<String> bizNames) {
Map<String, MetricSchemaResp> metricLowerToNameMap = ontology.getMetrics().stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> metricLowerToNameMap.containsKey(entry))
.map(entry -> metricLowerToNameMap.get(entry)).collect(Collectors.toList());
}
private OntologyQuery buildOntologyQuery(Ontology ontology, List<String> queryFields) {
OntologyQuery ontologyQuery = new OntologyQuery();
Set<String> fields = Sets.newHashSet(queryFields);
public List<DimSchemaResp> findQueryDimensions(Ontology ontology, List<String> bizNames) {
Map<String, DimSchemaResp> dimLowerToNameMap = ontology.getDimensions().stream().collect(
Collectors.toMap(entry -> entry.getBizName().toLowerCase(), entry -> entry));
return bizNames.stream().map(String::toLowerCase)
.filter(entry -> dimLowerToNameMap.containsKey(entry))
.map(entry -> dimLowerToNameMap.get(entry)).collect(Collectors.toList());
}
public List<ModelResp> findQueryModels(Ontology ontology, List<MetricSchemaResp> queryMetrics,
List<DimSchemaResp> queryDimensions) {
// first, sort models based on the number of query metrics
Map<String, Integer> modelMetricCount = Maps.newHashMap();
queryMetrics.forEach(m -> {
if (!modelMetricCount.containsKey(m.getModelBizName())) {
modelMetricCount.put(m.getModelBizName(), 1);
} else {
int count = modelMetricCount.get(m.getModelBizName());
modelMetricCount.put(m.getModelBizName(), count + 1);
}
// find belonging model for every querying metrics
ontology.getMetricMap().entrySet().forEach(entry -> {
String modelName = entry.getKey();
entry.getValue().forEach(m -> {
if (fields.contains(m.getName()) || fields.contains(m.getBizName())) {
if (!ontologyQuery.getMetricMap().containsKey(modelName)) {
ontologyQuery.getMetricMap().put(modelName, Sets.newHashSet());
}
ontologyQuery.getModelMap().put(modelName,
ontology.getModelMap().get(modelName));
ontologyQuery.getMetricMap().get(modelName).add(m);
fields.remove(m.getName());
fields.remove(m.getBizName());
}
});
});
List<String> metricsDataModels = modelMetricCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
// second, sort models based on the number of query dimensions
Map<String, Integer> modelDimCount = Maps.newHashMap();
queryDimensions.forEach(m -> {
if (!modelDimCount.containsKey(m.getModelBizName())) {
modelDimCount.put(m.getModelBizName(), 1);
} else {
int count = modelDimCount.get(m.getModelBizName());
modelDimCount.put(m.getModelBizName(), count + 1);
}
});
List<String> dimDataModels = modelDimCount.entrySet().stream()
.sorted(Map.Entry.comparingByValue(Comparator.reverseOrder())).map(e -> e.getKey())
.collect(Collectors.toList());
// first try to find all querying dimensions in the models with querying metrics.
ontology.getDimensionMap().entrySet().stream()
.filter(entry -> ontologyQuery.getMetricMap().containsKey(entry.getKey()))
.forEach(entry -> {
String modelName = entry.getKey();
entry.getValue().forEach(d -> {
if (fields.contains(d.getName()) || fields.contains(d.getBizName())) {
if (!ontologyQuery.getDimensionMap().containsKey(entry.getKey())) {
ontologyQuery.getDimensionMap().put(entry.getKey(),
Sets.newHashSet());
}
ontologyQuery.getModelMap().put(modelName,
ontology.getModelMap().get(modelName));
ontologyQuery.getDimensionMap().get(entry.getKey()).add(d);
fields.remove(d.getName());
fields.remove(d.getBizName());
}
});
});
Set<String> dataModelNames = Sets.newLinkedHashSet();
dataModelNames.addAll(dimDataModels);
dataModelNames.addAll(metricsDataModels);
return dataModelNames.stream().map(bizName -> ontology.getModelMap().get(bizName))
.collect(Collectors.toList());
// if there are still fields not found belonging models, try to find in the models without
// querying metrics.
if (!fields.isEmpty()) {
ontology.getDimensionMap().entrySet().forEach(entry -> {
String modelName = entry.getKey();
if (!ontologyQuery.getDimensionMap().containsKey(modelName)) {
entry.getValue().forEach(d -> {
if (fields.contains(d.getName()) || fields.contains(d.getBizName())) {
if (!ontologyQuery.getDimensionMap().containsKey(modelName)) {
ontologyQuery.getDimensionMap().put(modelName, Sets.newHashSet());
}
ontologyQuery.getModelMap().put(modelName,
ontology.getModelMap().get(modelName));
ontologyQuery.getDimensionMap().get(modelName).add(d);
fields.remove(d.getName());
fields.remove(d.getBizName());
}
});
}
});
}
return ontologyQuery;
}
}

View File

@@ -91,9 +91,9 @@ public class SqlBuilder {
for (int i = 0; i < dataModels.size(); i++) {
final ModelResp dataModel = dataModels.get(i);
final Set<DimSchemaResp> queryDimensions =
ontologyQuery.getDimensionsByModel(dataModel.getId());
ontologyQuery.getDimensionsByModel(dataModel.getName());
final Set<MetricSchemaResp> queryMetrics =
ontologyQuery.getMetricsByModel(dataModel.getId());
ontologyQuery.getMetricsByModel(dataModel.getName());
List<String> primary = new ArrayList<>();
for (Identify identify : dataModel.getIdentifiers()) {
@@ -248,8 +248,12 @@ public class SqlBuilder {
TableView tableView = new TableView();
EngineType engineType = EngineType.fromString(schema.getOntology().getDatabase().getType());
Set<String> queryFields = tableView.getFields();
queryMetrics.stream().forEach(m -> queryFields.addAll(m.getFields()));
queryDimensions.stream().forEach(d -> queryFields.addAll(d.getFields()));
if (Objects.nonNull(queryMetrics)) {
queryMetrics.stream().forEach(m -> queryFields.addAll(m.getFields()));
}
if (Objects.nonNull(queryDimensions)) {
queryDimensions.stream().forEach(d -> queryFields.addAll(d.getFields()));
}
try {
for (String field : queryFields) {

View File

@@ -697,31 +697,31 @@ public class MetricServiceImpl extends ServiceImpl<MetricDOMapper, MetricDO>
queryMetricReq.setDateInfo(null);
}
// 4. set groups
List<String> dimensionBizNames = dimensionResps.stream()
List<String> dimensionNames = dimensionResps.stream()
.filter(entry -> modelCluster.getModelIds().contains(entry.getModelId()))
.filter(entry -> queryMetricReq.getDimensionNames().contains(entry.getName())
|| queryMetricReq.getDimensionNames().contains(entry.getBizName())
|| queryMetricReq.getDimensionIds().contains(entry.getId()))
.map(SchemaItem::getBizName).collect(Collectors.toList());
.map(SchemaItem::getName).collect(Collectors.toList());
QueryStructReq queryStructReq = new QueryStructReq();
DateConf dateInfo = queryMetricReq.getDateInfo();
if (Objects.nonNull(dateInfo) && dateInfo.isGroupByDate()) {
queryStructReq.getGroups().add(dateInfo.getDateField());
}
if (!CollectionUtils.isEmpty(dimensionBizNames)) {
queryStructReq.getGroups().addAll(dimensionBizNames);
if (!CollectionUtils.isEmpty(dimensionNames)) {
queryStructReq.getGroups().addAll(dimensionNames);
}
// 5. set aggregators
List<String> metricBizNames = metricResps.stream()
List<String> metricNames = metricResps.stream()
.filter(entry -> modelCluster.getModelIds().contains(entry.getModelId()))
.map(SchemaItem::getBizName).collect(Collectors.toList());
if (CollectionUtils.isEmpty(metricBizNames)) {
.map(SchemaItem::getName).collect(Collectors.toList());
if (CollectionUtils.isEmpty(metricNames)) {
throw new IllegalArgumentException(
"Invalid input parameters, unable to obtain valid metrics");
}
List<Aggregator> aggregators = new ArrayList<>();
for (String metricBizName : metricBizNames) {
for (String metricBizName : metricNames) {
Aggregator aggregator = new Aggregator();
aggregator.setColumn(metricBizName);
aggregators.add(aggregator);