[improvement]Use QueryWrapper in place of hard-coded SQLs (#1944)
Some checks are pending
supersonic CentOS CI / build (11) (push) Waiting to run
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic CentOS CI / build (8) (push) Waiting to run
supersonic mac CI / build (11) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic mac CI / build (8) (push) Waiting to run
supersonic ubuntu CI / build (11) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (8) (push) Waiting to run
supersonic windows CI / build (11) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run
supersonic windows CI / build (8) (push) Waiting to run

* [improvement][launcher]Use API to get element ID avoiding hard-code.

* [fix][launcher]Fix mysql scripts.

* [improvement][launcher]Support DuckDB database and refactor translator code structure.

* [improvement][headless-fe] Revamped the interaction for semantic modeling routing and successfully implemented the switching between dimension and dataset management.

* [improvement][Headless] Add table ddl in Dbschema

* [improvement][Headless] Add get database by type

* [improvement][Headless] Supports automatic batch creation of models based on db table names.

* [improvement][Headless] Supports getting domain by bizName

* [improvement][launcher]Refactor unit tests and demo data.

* [fix][launcher]Change default vector dimension to 512.

* [improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService

* [improvement][headless]Use QueryWrapper to replace hard-code SQL in mapper xml.

* [improvement][chat]Introduce ChatMemory to delegate ChatMemoryDO.

* [fix][common]Fix embedding store sys configs.

* [fix][common]Fix postgres schema, using varchar instead of char.

* [improvement][launcher]Change supersonic docker deployment from mysql to postgres.

* [Fix][launcher]Fix a number of issues related to semantic modeling.

* [Fix][headless]Fix the evaluation logic of agg type.

* [fix][assembly]Fix Dockerfile and add docker compose run script.

* [fix][chat]Fix "multiple assignments to same column "similar_queries".

* [improvement][headless]Use LamdaQueryWrapper to avoid hard-coded column names.

* [improvement][headless]Refactor headless infra to support advanced semantic modelling.

* [improvement][headless]Change class name `Dim` to `Dimension`.

* [improvement][chat]Introduce `TimeFieldMapper` to always map time field.

* [fix][headless]Remove unnecessary dimension existence check.

* [fix][chat]Fix adjusted filters don't take effect.

---------
This commit is contained in:
Jun Zhang
2024-12-08 13:32:29 +08:00
committed by GitHub
parent 0fc29304a8
commit e55f43c737
120 changed files with 844 additions and 5810 deletions

View File

@@ -84,7 +84,7 @@ public abstract class BaseDbAdaptor implements DbAdaptor {
return connection.getMetaData();
}
protected static FieldType classifyColumnType(String typeName) {
public FieldType classifyColumnType(String typeName) {
switch (typeName.toUpperCase()) {
case "INT":
case "INTEGER":
@@ -101,7 +101,7 @@ public abstract class BaseDbAdaptor implements DbAdaptor {
case "TIMESTAMP":
return FieldType.time;
default:
return FieldType.dimension;
return FieldType.categorical;
}
}

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.headless.core.adaptor.db;
import com.tencent.supersonic.headless.api.pojo.DBColumn;
import com.tencent.supersonic.headless.api.pojo.enums.FieldType;
import com.tencent.supersonic.headless.core.pojo.ConnectInfo;
import java.sql.SQLException;
@@ -19,4 +20,6 @@ public interface DbAdaptor {
List<DBColumn> getColumns(ConnectInfo connectInfo, String schemaName, String tableName)
throws SQLException;
FieldType classifyColumnType(String typeName);
}

View File

@@ -114,7 +114,8 @@ public class PostgresqlAdaptor extends BaseDbAdaptor {
return dbColumns;
}
protected static FieldType classifyColumnType(String typeName) {
@Override
public FieldType classifyColumnType(String typeName) {
switch (typeName.toUpperCase()) {
case "INT":
case "INTEGER":
@@ -141,7 +142,7 @@ public class PostgresqlAdaptor extends BaseDbAdaptor {
case "CHARACTER":
case "UUID":
default:
return FieldType.dimension;
return FieldType.categorical;
}
}

View File

@@ -61,13 +61,18 @@ public class SqlQueryConverter implements QueryConverter {
List<MetricSchemaResp> metricSchemas = getMetrics(semanticSchemaResp, allFields);
List<String> metrics =
metricSchemas.stream().map(SchemaItem::getBizName).collect(Collectors.toList());
AggOption aggOption = getAggOption(sqlQueryParam.getSql(), metricSchemas);
Set<String> dimensions = getDimensions(semanticSchemaResp, allFields);
OntologyQueryParam ontologyQueryParam = new OntologyQueryParam();
ontologyQueryParam.getMetrics().addAll(metrics);
ontologyQueryParam.getDimensions().addAll(dimensions);
ontologyQueryParam.setAggOption(aggOption);
ontologyQueryParam.setNativeQuery(!AggOption.isAgg(aggOption));
AggOption sqlQueryAggOption = getAggOption(sqlQueryParam.getSql(), metricSchemas);
// if sql query itself has aggregation, ontology query just returns detail
if (sqlQueryAggOption.equals(AggOption.AGGREGATION)) {
ontologyQueryParam.setAggOption(AggOption.NATIVE);
} else if (sqlQueryAggOption.equals(AggOption.NATIVE) && !metrics.isEmpty()) {
ontologyQueryParam.setAggOption(AggOption.DEFAULT);
}
ontologyQueryParam.setNativeQuery(!AggOption.isAgg(ontologyQueryParam.getAggOption()));
queryStatement.setOntologyQueryParam(ontologyQueryParam);
generateDerivedMetric(sqlGenerateUtils, queryStatement);

View File

@@ -57,7 +57,11 @@ public class StructQueryConverter implements QueryConverter {
.map(Aggregator::getColumn).collect(Collectors.toList()));
String where = sqlGenerateUtils.generateWhere(structQueryParam, null);
ontologyQueryParam.setWhere(where);
ontologyQueryParam.setAggOption(AggOption.AGGREGATION);
if (ontologyQueryParam.getMetrics().isEmpty()) {
ontologyQueryParam.setAggOption(AggOption.NATIVE);
} else {
ontologyQueryParam.setAggOption(AggOption.DEFAULT);
}
ontologyQueryParam.setNativeQuery(structQueryParam.getQueryType().isNativeAggQuery());
ontologyQueryParam.setOrder(structQueryParam.getOrders().stream()
.map(order -> new ColumnOrder(order.getColumn(), order.getDirection()))

View File

@@ -26,8 +26,8 @@ public class DataModelNode extends SemanticNode {
sqlTable = dataModel.getSqlQuery();
} else if (dataModel.getTableQuery() != null && !dataModel.getTableQuery().isEmpty()) {
if (dataModel.getType().equalsIgnoreCase(EngineType.POSTGRESQL.getName())) {
String fullTableName = Arrays.stream(dataModel.getTableQuery().split("\\."))
.collect(Collectors.joining(".public."));
String fullTableName =
String.join(".public.", dataModel.getTableQuery().split("\\."));
sqlTable = "select * from " + fullTableName;
} else {
sqlTable = "select * from " + dataModel.getTableQuery();
@@ -64,7 +64,7 @@ public class DataModelNode extends SemanticNode {
for (Dimension d : datasource.getDimensions()) {
List<SqlNode> identifiers =
expand(SemanticNode.parse(d.getExpr(), scope, engineType), scope);
identifiers.stream().forEach(i -> dimensions.add(i.toString()));
identifiers.forEach(i -> dimensions.add(i.toString()));
dimensions.add(d.getName());
}
for (Identify i : datasource.getIdentifiers()) {
@@ -73,7 +73,7 @@ public class DataModelNode extends SemanticNode {
for (Measure m : datasource.getMeasures()) {
List<SqlNode> identifiers =
expand(SemanticNode.parse(m.getExpr(), scope, engineType), scope);
identifiers.stream().forEach(i -> {
identifiers.forEach(i -> {
if (!dimensions.contains(i.toString())) {
metrics.add(i.toString());
}
@@ -127,7 +127,7 @@ public class DataModelNode extends SemanticNode {
}
public static String getNames(List<DataModel> dataModelList) {
return dataModelList.stream().map(d -> d.getName()).collect(Collectors.joining("_"));
return dataModelList.stream().map(DataModel::getName).collect(Collectors.joining("_"));
}
public static void getQueryDimensionMeasure(Ontology ontology, OntologyQueryParam queryParam,
@@ -138,12 +138,12 @@ public class DataModelNode extends SemanticNode {
: d)
.collect(Collectors.toSet()));
Set<String> schemaMetricName =
ontology.getMetrics().stream().map(m -> m.getName()).collect(Collectors.toSet());
ontology.getMetrics().stream().map(Metric::getName).collect(Collectors.toSet());
ontology.getMetrics().stream().filter(m -> queryParam.getMetrics().contains(m.getName()))
.forEach(m -> m.getMetricTypeParams().getMeasures().stream()
.forEach(m -> m.getMetricTypeParams().getMeasures()
.forEach(mm -> queryMeasures.add(mm.getName())));
queryParam.getMetrics().stream().filter(m -> !schemaMetricName.contains(m))
.forEach(m -> queryMeasures.add(m));
.forEach(queryMeasures::add);
}
public static void mergeQueryFilterDimensionMeasure(Ontology ontology,
@@ -155,13 +155,13 @@ public class DataModelNode extends SemanticNode {
FilterNode.getFilterField(parse(queryParam.getWhere(), scope, engineType),
filterConditions);
Set<String> queryMeasures = new HashSet<>(measures);
Set<String> schemaMetricName = ontology.getMetrics().stream().map(m -> m.getName())
.collect(Collectors.toSet());
Set<String> schemaMetricName =
ontology.getMetrics().stream().map(Metric::getName).collect(Collectors.toSet());
for (String filterCondition : filterConditions) {
if (schemaMetricName.contains(filterCondition)) {
ontology.getMetrics().stream()
.filter(m -> m.getName().equalsIgnoreCase(filterCondition))
.forEach(m -> m.getMetricTypeParams().getMeasures().stream()
.forEach(m -> m.getMetricTypeParams().getMeasures()
.forEach(mm -> queryMeasures.add(mm.getName())));
continue;
}
@@ -196,8 +196,8 @@ public class DataModelNode extends SemanticNode {
}
// second, traverse the ontology to find other related dataModels
List<DataModel> relatedDataModels = findRelatedModelsByRelation(ontology, baseDataModel,
queryDimensions, queryMeasures);
List<DataModel> relatedDataModels = findRelatedModelsByRelation(ontology, queryParam,
baseDataModel, queryDimensions, queryMeasures);
if (CollectionUtils.isEmpty(relatedDataModels)) {
relatedDataModels = findRelatedModelsByIdentifier(ontology, baseDataModel,
queryDimensions, queryMeasures);
@@ -255,7 +255,7 @@ public class DataModelNode extends SemanticNode {
.collect(Collectors.toSet());
Set<String> baseDimensions = baseDataModel.getDimensions().stream().map(Dimension::getName)
.collect(Collectors.toSet());
baseDataModel.getIdentifiers().stream().forEach(i -> baseDimensions.add(i.getName()));
baseDataModel.getIdentifiers().forEach(i -> baseDimensions.add(i.getName()));
baseMeasures.retainAll(queryMeasures);
if (baseMeasures.size() < queryMeasures.size()) {
@@ -282,7 +282,8 @@ public class DataModelNode extends SemanticNode {
}
private static List<DataModel> findRelatedModelsByRelation(Ontology ontology,
DataModel baseDataModel, Set<String> queryDimensions, Set<String> queryMeasures) {
OntologyQueryParam queryParam, DataModel baseDataModel, Set<String> queryDimensions,
Set<String> queryMeasures) {
Set<String> joinDataModelNames = new HashSet<>();
List<DataModel> joinDataModels = new ArrayList<>();
Set<String> before = new HashSet<>();
@@ -295,7 +296,7 @@ public class DataModelNode extends SemanticNode {
visitJoinRelations, sortedJoinRelation);
ontology.getJoinRelations().stream()
.filter(j -> !visitJoinRelations.contains(j.getId()))
.forEach(j -> sortedJoinRelation.add(j));
.forEach(sortedJoinRelation::add);
for (JoinRelation joinRelation : sortedJoinRelation) {
if (!before.contains(joinRelation.getLeft())
&& !before.contains(joinRelation.getRight())) {
@@ -305,13 +306,17 @@ public class DataModelNode extends SemanticNode {
boolean isRight = before.contains(joinRelation.getLeft());
DataModel other = isRight ? ontology.getDataModelMap().get(joinRelation.getRight())
: ontology.getDataModelMap().get(joinRelation.getLeft());
String joinDimName = isRight ? joinRelation.getJoinCondition().get(0).getRight()
: joinRelation.getJoinCondition().get(0).getLeft();
if (!queryDimensions.isEmpty()) {
Set<String> linkDimension = other.getDimensions().stream()
.map(dd -> dd.getName()).collect(Collectors.toSet());
other.getIdentifiers().stream().forEach(i -> linkDimension.add(i.getName()));
.map(Dimension::getName).collect(Collectors.toSet());
other.getIdentifiers().forEach(i -> linkDimension.add(i.getName()));
linkDimension.retainAll(queryDimensions);
if (!linkDimension.isEmpty()) {
isMatch = true;
// joinDim should be added to the query dimension
queryParam.getDimensions().add(joinDimName);
}
}
Set<String> linkMeasure = other.getMeasures().stream().map(Measure::getName)
@@ -322,7 +327,7 @@ public class DataModelNode extends SemanticNode {
}
if (!isMatch && ontology.getDimensionMap().containsKey(other.getName())) {
Set<String> linkDimension = ontology.getDimensionMap().get(other.getName())
.stream().map(dd -> dd.getName()).collect(Collectors.toSet());
.stream().map(Dimension::getName).collect(Collectors.toSet());
linkDimension.retainAll(queryDimensions);
if (!linkDimension.isEmpty()) {
isMatch = true;
@@ -382,15 +387,14 @@ public class DataModelNode extends SemanticNode {
if (entry.getKey().equalsIgnoreCase(baseDataModel.getName())) {
continue;
}
Long identifierNum = entry.getValue().getIdentifiers().stream().map(i -> i.getName())
.filter(i -> baseIdentifiers.contains(i)).count();
long identifierNum = entry.getValue().getIdentifiers().stream().map(Identify::getName)
.filter(baseIdentifiers::contains).count();
if (identifierNum > 0) {
boolean isMatch = false;
if (!queryDimension.isEmpty()) {
Set<String> linkDimension = entry.getValue().getDimensions().stream()
.map(dd -> dd.getName()).collect(Collectors.toSet());
entry.getValue().getIdentifiers().stream()
.forEach(i -> linkDimension.add(i.getName()));
.map(Dimension::getName).collect(Collectors.toSet());
entry.getValue().getIdentifiers().forEach(i -> linkDimension.add(i.getName()));
linkDimension.retainAll(queryDimension);
if (!linkDimension.isEmpty()) {
isMatch = true;
@@ -398,7 +402,7 @@ public class DataModelNode extends SemanticNode {
}
if (!measures.isEmpty()) {
Set<String> linkMeasure = entry.getValue().getMeasures().stream()
.map(mm -> mm.getName()).collect(Collectors.toSet());
.map(Measure::getName).collect(Collectors.toSet());
linkMeasure.retainAll(measures);
if (!linkMeasure.isEmpty()) {
isMatch = true;

View File

@@ -15,6 +15,6 @@ public class OntologyQueryParam {
private String where;
private Long limit;
private List<ColumnOrder> order;
private boolean nativeQuery = false;
private AggOption aggOption = AggOption.DEFAULT;
private boolean nativeQuery = true;
private AggOption aggOption = AggOption.NATIVE;
}

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.headless.core.utils;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.Dim;
import com.tencent.supersonic.headless.api.pojo.Dimension;
import com.tencent.supersonic.headless.api.pojo.DimensionTimeTypeParams;
import com.tencent.supersonic.headless.api.pojo.enums.DimensionType;
import com.tencent.supersonic.headless.core.adaptor.db.DbAdaptor;
@@ -20,11 +20,11 @@ public class SysTimeDimensionBuilder {
Pattern.compile("\\b(DATE|TIME|TIMESTAMP|YEAR|MONTH|DAY|HOUR|MINUTE|SECOND)\\b",
Pattern.CASE_INSENSITIVE);
public static void addSysTimeDimension(List<Dim> dims, DbAdaptor engineAdaptor) {
public static void addSysTimeDimension(List<Dimension> dims, DbAdaptor engineAdaptor) {
log.debug("addSysTimeDimension before:{}, engineAdaptor:{}", dims, engineAdaptor);
Dim timeDim = getTimeDim(dims);
Dimension timeDim = getTimeDim(dims);
if (timeDim == null) {
timeDim = Dim.getDefault();
timeDim = Dimension.getDefault();
// todo not find the time dimension
return;
}
@@ -34,8 +34,8 @@ public class SysTimeDimensionBuilder {
log.debug("addSysTimeDimension after:{}, engineAdaptor:{}", dims, engineAdaptor);
}
private static Dim generateSysDayDimension(Dim timeDim, DbAdaptor engineAdaptor) {
Dim dim = new Dim();
private static Dimension generateSysDayDimension(Dimension timeDim, DbAdaptor engineAdaptor) {
Dimension dim = new Dimension();
dim.setBizName(TimeDimensionEnum.DAY.getName());
dim.setType(DimensionType.partition_time);
dim.setExpr(generateTimeExpr(timeDim, TimeDimensionEnum.DAY.name().toLowerCase(),
@@ -47,8 +47,8 @@ public class SysTimeDimensionBuilder {
return dim;
}
private static Dim generateSysWeekDimension(Dim timeDim, DbAdaptor engineAdaptor) {
Dim dim = new Dim();
private static Dimension generateSysWeekDimension(Dimension timeDim, DbAdaptor engineAdaptor) {
Dimension dim = new Dimension();
dim.setBizName(TimeDimensionEnum.WEEK.getName());
dim.setType(DimensionType.partition_time);
dim.setExpr(generateTimeExpr(timeDim, TimeDimensionEnum.WEEK.name().toLowerCase(),
@@ -60,8 +60,8 @@ public class SysTimeDimensionBuilder {
return dim;
}
private static Dim generateSysMonthDimension(Dim timeDim, DbAdaptor engineAdaptor) {
Dim dim = new Dim();
private static Dimension generateSysMonthDimension(Dimension timeDim, DbAdaptor engineAdaptor) {
Dimension dim = new Dimension();
dim.setBizName(TimeDimensionEnum.MONTH.getName());
dim.setType(DimensionType.partition_time);
dim.setExpr(generateTimeExpr(timeDim, TimeDimensionEnum.MONTH.name().toLowerCase(),
@@ -79,7 +79,8 @@ public class SysTimeDimensionBuilder {
}
// Check whether the time field contains keywords,Generation time expression
private static String generateTimeExpr(Dim timeDim, String dateType, DbAdaptor engineAdaptor) {
private static String generateTimeExpr(Dimension timeDim, String dateType,
DbAdaptor engineAdaptor) {
String bizName = timeDim.getBizName();
String dateFormat = timeDim.getDateFormat();
if (containsTimeKeyword(bizName)) {
@@ -90,8 +91,8 @@ public class SysTimeDimensionBuilder {
}
}
private static Dim getTimeDim(List<Dim> timeDims) {
for (Dim dim : timeDims) {
private static Dimension getTimeDim(List<Dimension> timeDims) {
for (Dimension dim : timeDims) {
if (dim.getType().equals(DimensionType.partition_time)) {
return dim;
}