(improvement)(headless)Add expr to semantic column.

This commit is contained in:
jerryjzhang
2025-03-09 08:31:48 +08:00
parent 954aa4eea5
commit c1f9df963c
7 changed files with 36 additions and 28 deletions

View File

@@ -14,11 +14,11 @@ public class ModelSchema {
private String description; private String description;
private List<ColumnSchema> columnSchemas; private List<SemanticColumn> semanticColumns;
@JsonIgnore @JsonIgnore
public ColumnSchema getColumnByName(String columnName) { public SemanticColumn getColumnByName(String columnName) {
for (ColumnSchema fieldSchema : columnSchemas) { for (SemanticColumn fieldSchema : semanticColumns) {
if (fieldSchema.getColumnName().equalsIgnoreCase(columnName)) { if (fieldSchema.getColumnName().equalsIgnoreCase(columnName)) {
return fieldSchema; return fieldSchema;
} }

View File

@@ -5,7 +5,7 @@ import com.tencent.supersonic.headless.api.pojo.enums.FieldType;
import lombok.Data; import lombok.Data;
@Data @Data
public class ColumnSchema { public class SemanticColumn {
private String columnName; private String columnName;
@@ -19,4 +19,6 @@ public class ColumnSchema {
private String name; private String name;
private String expr;
} }

View File

@@ -75,6 +75,7 @@ public class LLMSemanticModeller implements SemanticModeller {
if (!chatApp.isPresent() || !chatApp.get().isEnable()) { if (!chatApp.isPresent() || !chatApp.get().isEnable()) {
return; return;
} }
List<DbSchema> otherDbSchema = getOtherDbSchema(dbSchema, dbSchemas); List<DbSchema> otherDbSchema = getOtherDbSchema(dbSchema, dbSchemas);
ModelSchemaExtractor extractor = ModelSchemaExtractor extractor =
AiServices.create(ModelSchemaExtractor.class, getChatModel(modelBuildReq)); AiServices.create(ModelSchemaExtractor.class, getChatModel(modelBuildReq));

View File

@@ -1,9 +1,9 @@
package com.tencent.supersonic.headless.server.modeller; package com.tencent.supersonic.headless.server.modeller;
import com.tencent.supersonic.headless.api.pojo.ColumnSchema;
import com.tencent.supersonic.headless.api.pojo.DBColumn; import com.tencent.supersonic.headless.api.pojo.DBColumn;
import com.tencent.supersonic.headless.api.pojo.DbSchema; import com.tencent.supersonic.headless.api.pojo.DbSchema;
import com.tencent.supersonic.headless.api.pojo.ModelSchema; import com.tencent.supersonic.headless.api.pojo.ModelSchema;
import com.tencent.supersonic.headless.api.pojo.SemanticColumn;
import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq; import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@@ -16,19 +16,20 @@ public class RuleSemanticModeller implements SemanticModeller {
@Override @Override
public void build(DbSchema dbSchema, List<DbSchema> dbSchemas, ModelSchema modelSchema, public void build(DbSchema dbSchema, List<DbSchema> dbSchemas, ModelSchema modelSchema,
ModelBuildReq modelBuildReq) { ModelBuildReq modelBuildReq) {
List<ColumnSchema> columnSchemas = List<SemanticColumn> semanticColumns =
dbSchema.getDbColumns().stream().map(this::convert).collect(Collectors.toList()); dbSchema.getDbColumns().stream().map(this::convert).collect(Collectors.toList());
modelSchema.setColumnSchemas(columnSchemas); modelSchema.setSemanticColumns(semanticColumns);
} }
private ColumnSchema convert(DBColumn dbColumn) { private SemanticColumn convert(DBColumn dbColumn) {
ColumnSchema columnSchema = new ColumnSchema(); SemanticColumn semanticColumn = new SemanticColumn();
columnSchema.setName(dbColumn.getColumnName()); semanticColumn.setName(dbColumn.getColumnName());
columnSchema.setColumnName(dbColumn.getColumnName()); semanticColumn.setColumnName(dbColumn.getColumnName());
columnSchema.setComment(dbColumn.getComment()); semanticColumn.setExpr(dbColumn.getColumnName());
columnSchema.setDataType(dbColumn.getDataType()); semanticColumn.setComment(dbColumn.getComment());
columnSchema.setFiledType(dbColumn.getFieldType()); semanticColumn.setDataType(dbColumn.getDataType());
return columnSchema; semanticColumn.setFiledType(dbColumn.getFieldType());
return semanticColumn;
} }
} }

View File

@@ -7,6 +7,9 @@ import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq;
import java.util.List; import java.util.List;
/**
* A semantic modeler builds semantic-layer schemas from database-layer schemas.
*/
public interface SemanticModeller { public interface SemanticModeller {
void build(DbSchema dbSchema, List<DbSchema> otherDbSchema, ModelSchema modelSchema, void build(DbSchema dbSchema, List<DbSchema> otherDbSchema, ModelSchema modelSchema,

View File

@@ -158,22 +158,23 @@ public class ModelConverter {
modelDetail.setTableQuery(String.format("%s.%s", modelBuildReq.getDb(), tableName)); modelDetail.setTableQuery(String.format("%s.%s", modelBuildReq.getDb(), tableName));
} }
List<Field> fields = new ArrayList<>(); List<Field> fields = new ArrayList<>();
for (ColumnSchema columnSchema : modelSchema.getColumnSchemas()) { for (SemanticColumn semanticColumn : modelSchema.getSemanticColumns()) {
FieldType fieldType = columnSchema.getFiledType(); FieldType fieldType = semanticColumn.getFiledType();
fields.add(new Field(columnSchema.getName(), columnSchema.getDataType())); fields.add(new Field(semanticColumn.getName(), semanticColumn.getDataType()));
if (getIdentifyType(fieldType) != null) { if (getIdentifyType(fieldType) != null) {
Identify identify = new Identify(columnSchema.getName(), Identify identify = new Identify(semanticColumn.getName(),
getIdentifyType(fieldType).name(), columnSchema.getColumnName(), 1); getIdentifyType(fieldType).name(), semanticColumn.getColumnName(), 1);
modelDetail.getIdentifiers().add(identify); modelDetail.getIdentifiers().add(identify);
} else if (FieldType.measure.equals(fieldType)) { } else if (FieldType.measure.equals(fieldType)) {
Measure measure = new Measure(columnSchema.getName(), columnSchema.getColumnName(), Measure measure =
columnSchema.getColumnName(), columnSchema.getAgg().getOperator(), 1); new Measure(semanticColumn.getName(), semanticColumn.getColumnName(),
semanticColumn.getExpr(), semanticColumn.getAgg().getOperator(), 1);
modelDetail.getMeasures().add(measure); modelDetail.getMeasures().add(measure);
} else { } else {
Dimension dim = new Dimension(columnSchema.getName(), columnSchema.getColumnName(), Dimension dim = new Dimension(semanticColumn.getName(),
columnSchema.getColumnName(), semanticColumn.getColumnName(), semanticColumn.getExpr(),
DimensionType.valueOf(columnSchema.getFiledType().name()), 1); DimensionType.valueOf(semanticColumn.getFiledType().name()), 1);
modelDetail.getDimensions().add(dim); modelDetail.getDimensions().add(dim);
} }
} }

View File

@@ -39,14 +39,14 @@ public class SemanticModellerTest extends BaseTest {
Map<String, ModelSchema> modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); Map<String, ModelSchema> modelSchemaMap = modelService.buildModelSchema(modelSchemaReq);
ModelSchema userModelSchema = modelSchemaMap.get("s2_user_department"); ModelSchema userModelSchema = modelSchemaMap.get("s2_user_department");
Assertions.assertEquals(2, userModelSchema.getColumnSchemas().size()); Assertions.assertEquals(2, userModelSchema.getSemanticColumns().size());
Assertions.assertEquals(FieldType.primary_key, Assertions.assertEquals(FieldType.primary_key,
userModelSchema.getColumnByName("user_name").getFiledType()); userModelSchema.getColumnByName("user_name").getFiledType());
Assertions.assertEquals(FieldType.categorical, Assertions.assertEquals(FieldType.categorical,
userModelSchema.getColumnByName("department").getFiledType()); userModelSchema.getColumnByName("department").getFiledType());
ModelSchema stayTimeModelSchema = modelSchemaMap.get("s2_stay_time_statis"); ModelSchema stayTimeModelSchema = modelSchemaMap.get("s2_stay_time_statis");
Assertions.assertEquals(4, stayTimeModelSchema.getColumnSchemas().size()); Assertions.assertEquals(4, stayTimeModelSchema.getSemanticColumns().size());
Assertions.assertEquals(FieldType.foreign_key, Assertions.assertEquals(FieldType.foreign_key,
stayTimeModelSchema.getColumnByName("user_name").getFiledType()); stayTimeModelSchema.getColumnByName("user_name").getFiledType());
Assertions.assertEquals(FieldType.partition_time, Assertions.assertEquals(FieldType.partition_time,
@@ -72,7 +72,7 @@ public class SemanticModellerTest extends BaseTest {
Map<String, ModelSchema> modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); Map<String, ModelSchema> modelSchemaMap = modelService.buildModelSchema(modelSchemaReq);
ModelSchema pvModelSchema = modelSchemaMap.values().iterator().next(); ModelSchema pvModelSchema = modelSchemaMap.values().iterator().next();
Assertions.assertEquals(5, pvModelSchema.getColumnSchemas().size()); Assertions.assertEquals(5, pvModelSchema.getSemanticColumns().size());
Assertions.assertEquals(FieldType.partition_time, Assertions.assertEquals(FieldType.partition_time,
pvModelSchema.getColumnByName("imp_date").getFiledType()); pvModelSchema.getColumnByName("imp_date").getFiledType());
Assertions.assertEquals(FieldType.categorical, Assertions.assertEquals(FieldType.categorical,