From c1f9df963cf90f0a9cb2d47ef4c443edc084da7d Mon Sep 17 00:00:00 2001 From: jerryjzhang Date: Sun, 9 Mar 2025 08:31:48 +0800 Subject: [PATCH] (improvement)(headless)Add expr to semantic column. --- .../headless/api/pojo/ModelSchema.java | 6 ++--- ...{ColumnSchema.java => SemanticColumn.java} | 4 +++- .../server/modeller/LLMSemanticModeller.java | 1 + .../server/modeller/RuleSemanticModeller.java | 23 ++++++++++--------- .../server/modeller/SemanticModeller.java | 3 +++ .../headless/server/utils/ModelConverter.java | 21 +++++++++-------- .../headless/SemanticModellerTest.java | 6 ++--- 7 files changed, 36 insertions(+), 28 deletions(-) rename headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/{ColumnSchema.java => SemanticColumn.java} (88%) diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ModelSchema.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ModelSchema.java index d1429cad2..a16a02ba9 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ModelSchema.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ModelSchema.java @@ -14,11 +14,11 @@ public class ModelSchema { private String description; - private List columnSchemas; + private List semanticColumns; @JsonIgnore - public ColumnSchema getColumnByName(String columnName) { - for (ColumnSchema fieldSchema : columnSchemas) { + public SemanticColumn getColumnByName(String columnName) { + for (SemanticColumn fieldSchema : semanticColumns) { if (fieldSchema.getColumnName().equalsIgnoreCase(columnName)) { return fieldSchema; } diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ColumnSchema.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticColumn.java similarity index 88% rename from headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ColumnSchema.java rename to headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticColumn.java index ccc2e4bda..dc31195d3 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/ColumnSchema.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticColumn.java @@ -5,7 +5,7 @@ import com.tencent.supersonic.headless.api.pojo.enums.FieldType; import lombok.Data; @Data -public class ColumnSchema { +public class SemanticColumn { private String columnName; @@ -19,4 +19,6 @@ public class ColumnSchema { private String name; + private String expr; + } diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/LLMSemanticModeller.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/LLMSemanticModeller.java index b601fb252..1f6bb4960 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/LLMSemanticModeller.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/LLMSemanticModeller.java @@ -75,6 +75,7 @@ public class LLMSemanticModeller implements SemanticModeller { if (!chatApp.isPresent() || !chatApp.get().isEnable()) { return; } + List otherDbSchema = getOtherDbSchema(dbSchema, dbSchemas); ModelSchemaExtractor extractor = AiServices.create(ModelSchemaExtractor.class, getChatModel(modelBuildReq)); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/RuleSemanticModeller.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/RuleSemanticModeller.java index 324c0cb61..e57d8c933 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/RuleSemanticModeller.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/RuleSemanticModeller.java @@ -1,9 +1,9 @@ package com.tencent.supersonic.headless.server.modeller; -import com.tencent.supersonic.headless.api.pojo.ColumnSchema; import com.tencent.supersonic.headless.api.pojo.DBColumn; import com.tencent.supersonic.headless.api.pojo.DbSchema; import com.tencent.supersonic.headless.api.pojo.ModelSchema; +import com.tencent.supersonic.headless.api.pojo.SemanticColumn; import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq; import lombok.extern.slf4j.Slf4j; @@ -16,19 +16,20 @@ public class RuleSemanticModeller implements SemanticModeller { @Override public void build(DbSchema dbSchema, List dbSchemas, ModelSchema modelSchema, ModelBuildReq modelBuildReq) { - List columnSchemas = + List semanticColumns = dbSchema.getDbColumns().stream().map(this::convert).collect(Collectors.toList()); - modelSchema.setColumnSchemas(columnSchemas); + modelSchema.setSemanticColumns(semanticColumns); } - private ColumnSchema convert(DBColumn dbColumn) { - ColumnSchema columnSchema = new ColumnSchema(); - columnSchema.setName(dbColumn.getColumnName()); - columnSchema.setColumnName(dbColumn.getColumnName()); - columnSchema.setComment(dbColumn.getComment()); - columnSchema.setDataType(dbColumn.getDataType()); - columnSchema.setFiledType(dbColumn.getFieldType()); - return columnSchema; + private SemanticColumn convert(DBColumn dbColumn) { + SemanticColumn semanticColumn = new SemanticColumn(); + semanticColumn.setName(dbColumn.getColumnName()); + semanticColumn.setColumnName(dbColumn.getColumnName()); + semanticColumn.setExpr(dbColumn.getColumnName()); + semanticColumn.setComment(dbColumn.getComment()); + semanticColumn.setDataType(dbColumn.getDataType()); + semanticColumn.setFiledType(dbColumn.getFieldType()); + return semanticColumn; } } diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/SemanticModeller.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/SemanticModeller.java index c8a15cd96..0dcbca514 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/SemanticModeller.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/modeller/SemanticModeller.java @@ -7,6 +7,9 @@ import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq; import java.util.List; +/** + * A semantic modeler builds semantic-layer schemas from database-layer schemas. + */ public interface SemanticModeller { void build(DbSchema dbSchema, List otherDbSchema, ModelSchema modelSchema, diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/ModelConverter.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/ModelConverter.java index 5cef682aa..b765b842b 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/ModelConverter.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/ModelConverter.java @@ -158,22 +158,23 @@ public class ModelConverter { modelDetail.setTableQuery(String.format("%s.%s", modelBuildReq.getDb(), tableName)); } List fields = new ArrayList<>(); - for (ColumnSchema columnSchema : modelSchema.getColumnSchemas()) { - FieldType fieldType = columnSchema.getFiledType(); - fields.add(new Field(columnSchema.getName(), columnSchema.getDataType())); + for (SemanticColumn semanticColumn : modelSchema.getSemanticColumns()) { + FieldType fieldType = semanticColumn.getFiledType(); + fields.add(new Field(semanticColumn.getName(), semanticColumn.getDataType())); if (getIdentifyType(fieldType) != null) { - Identify identify = new Identify(columnSchema.getName(), - getIdentifyType(fieldType).name(), columnSchema.getColumnName(), 1); + Identify identify = new Identify(semanticColumn.getName(), + getIdentifyType(fieldType).name(), semanticColumn.getColumnName(), 1); modelDetail.getIdentifiers().add(identify); } else if (FieldType.measure.equals(fieldType)) { - Measure measure = new Measure(columnSchema.getName(), columnSchema.getColumnName(), - columnSchema.getColumnName(), columnSchema.getAgg().getOperator(), 1); + Measure measure = + new Measure(semanticColumn.getName(), semanticColumn.getColumnName(), + semanticColumn.getExpr(), semanticColumn.getAgg().getOperator(), 1); modelDetail.getMeasures().add(measure); } else { - Dimension dim = new Dimension(columnSchema.getName(), columnSchema.getColumnName(), - columnSchema.getColumnName(), - DimensionType.valueOf(columnSchema.getFiledType().name()), 1); + Dimension dim = new Dimension(semanticColumn.getName(), + semanticColumn.getColumnName(), semanticColumn.getExpr(), + DimensionType.valueOf(semanticColumn.getFiledType().name()), 1); modelDetail.getDimensions().add(dim); } } diff --git a/launchers/standalone/src/test/java/com/tencent/supersonic/headless/SemanticModellerTest.java b/launchers/standalone/src/test/java/com/tencent/supersonic/headless/SemanticModellerTest.java index b66c58f55..dcd09679f 100644 --- a/launchers/standalone/src/test/java/com/tencent/supersonic/headless/SemanticModellerTest.java +++ b/launchers/standalone/src/test/java/com/tencent/supersonic/headless/SemanticModellerTest.java @@ -39,14 +39,14 @@ public class SemanticModellerTest extends BaseTest { Map modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); ModelSchema userModelSchema = modelSchemaMap.get("s2_user_department"); - Assertions.assertEquals(2, userModelSchema.getColumnSchemas().size()); + Assertions.assertEquals(2, userModelSchema.getSemanticColumns().size()); Assertions.assertEquals(FieldType.primary_key, userModelSchema.getColumnByName("user_name").getFiledType()); Assertions.assertEquals(FieldType.categorical, userModelSchema.getColumnByName("department").getFiledType()); ModelSchema stayTimeModelSchema = modelSchemaMap.get("s2_stay_time_statis"); - Assertions.assertEquals(4, stayTimeModelSchema.getColumnSchemas().size()); + Assertions.assertEquals(4, stayTimeModelSchema.getSemanticColumns().size()); Assertions.assertEquals(FieldType.foreign_key, stayTimeModelSchema.getColumnByName("user_name").getFiledType()); Assertions.assertEquals(FieldType.partition_time, @@ -72,7 +72,7 @@ public class SemanticModellerTest extends BaseTest { Map modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); ModelSchema pvModelSchema = modelSchemaMap.values().iterator().next(); - Assertions.assertEquals(5, pvModelSchema.getColumnSchemas().size()); + Assertions.assertEquals(5, pvModelSchema.getSemanticColumns().size()); Assertions.assertEquals(FieldType.partition_time, pvModelSchema.getColumnByName("imp_date").getFiledType()); Assertions.assertEquals(FieldType.categorical,