From 68d5dac14c05118778dcec25f151c830386b803b Mon Sep 17 00:00:00 2001 From: LXW <1264174498@qq.com> Date: Wed, 30 Oct 2024 21:56:32 +0800 Subject: [PATCH] (improvement)(Headless) Remove unnecessary code (#1863) Co-authored-by: lxwcodemonkey --- .../server/builder/IntelligentBuilder.java | 12 -- .../builder/ModelIntelligentBuilder.java | 113 ------------------ .../server/service/impl/ModelServiceImpl.java | 61 +++------- .../server/service/ModelServiceImplTest.java | 11 +- .../headless/ModelIntelligentBuildTest.java | 91 -------------- 5 files changed, 19 insertions(+), 269 deletions(-) delete mode 100644 headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/IntelligentBuilder.java delete mode 100644 headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/ModelIntelligentBuilder.java delete mode 100644 launchers/standalone/src/test/java/com/tencent/supersonic/headless/ModelIntelligentBuildTest.java diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/IntelligentBuilder.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/IntelligentBuilder.java deleted file mode 100644 index d235f51ed..000000000 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/IntelligentBuilder.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.tencent.supersonic.headless.server.builder; - -import com.tencent.supersonic.common.pojo.ChatModelConfig; -import dev.langchain4j.model.chat.ChatLanguageModel; -import dev.langchain4j.provider.ModelProvider; - -public abstract class IntelligentBuilder { - - protected ChatLanguageModel getChatModel(ChatModelConfig chatModelConfig) { - return ModelProvider.getChatModel(chatModelConfig); - } -} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/ModelIntelligentBuilder.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/ModelIntelligentBuilder.java deleted file mode 100644 index c3730be22..000000000 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/builder/ModelIntelligentBuilder.java +++ /dev/null @@ -1,113 +0,0 @@ -package com.tencent.supersonic.headless.server.builder; - -import com.fasterxml.jackson.databind.ObjectMapper; -import com.tencent.supersonic.common.pojo.ChatApp; -import com.tencent.supersonic.common.pojo.ChatModelConfig; -import com.tencent.supersonic.common.pojo.enums.AppModule; -import com.tencent.supersonic.common.util.ChatAppManager; -import com.tencent.supersonic.common.util.JsonUtil; -import com.tencent.supersonic.headless.api.pojo.DbSchema; -import com.tencent.supersonic.headless.api.pojo.ModelSchema; -import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq; -import dev.langchain4j.model.input.Prompt; -import dev.langchain4j.model.input.PromptTemplate; -import dev.langchain4j.service.AiServices; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.ClassPathResource; -import org.springframework.stereotype.Component; - -import java.io.InputStream; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -@Slf4j -@Component -public class ModelIntelligentBuilder extends IntelligentBuilder { - - public static final String APP_KEY = "BUILD_DATA_MODEL"; - - private static final String SYS_EXEMPLAR_FILE = "s2-buildModel-exemplar.json"; - - public static final String INSTRUCTION = "" - + "Role: As an experienced data analyst with extensive modeling experience, " - + " you are expected to have a deep understanding of data analysis and data modeling concepts." - + "\nJob: You will be given a database table structure, which includes the database table name, field name," - + " field type, and field comments. Your task is to utilize this information for data modeling." - + "\nTask:" - + "\n1. Generate a name and description for the model. Please note, 'bizName' refers to the English name, while 'name' is the Chinese name." - + "\n2. Create a Chinese name for the field and categorize the field into one of the following five types:" - + "\n primary_key: This is a unique identifier for a record row in a database." - + "\n foreign_key: This is a key in a database whose value is derived from the primary key of another table." - + "\n data_time: This represents the time when data is generated in the data warehouse." - + "\n dimension: Usually a string type, used for grouping and filtering data. No need to generate aggregate functions" - + "\n measure: Usually a numeric type, used to quantify data from a certain evaluative perspective. " - + " Also, you need to generate aggregate functions(Eg: MAX, MIN, AVG, SUM, COUNT) for the measure type. " - + "\nTip: I will also give you other related dbSchemas. If you determine that different dbSchemas have the same fields, " - + " they can be primary and foreign key relationships." - + "\nDBSchema: {{DBSchema}}" + "\nOtherRelatedDBSchema: {{otherRelatedDBSchema}}" - + "\nExemplar: {{exemplar}}"; - - private final ObjectMapper objectMapper = JsonUtil.INSTANCE.getObjectMapper(); - - @Value("${s2.model.building.exemplars.enabled:true}") - private Boolean enableExemplarLoading; - - public ModelIntelligentBuilder() { - ChatAppManager.register(APP_KEY, ChatApp.builder().prompt(INSTRUCTION).name("构造数据语义模型") - .appModule(AppModule.HEADLESS).description("通过大模型来构造数据语义模型").enable(true).build()); - } - - interface ModelSchemaExtractor { - ModelSchema generateModelSchema(String text); - } - - - public ModelSchema build(DbSchema dbSchema, List otherDbSchema, - ModelBuildReq modelBuildReq) { - Optional chatApp = ChatAppManager.getApp(APP_KEY); - if (!chatApp.isPresent() || !chatApp.get().isEnable()) { - return null; - } - ChatModelConfig chatModelConfig = modelBuildReq.getChatModelConfig(); - ModelSchemaExtractor extractor = - AiServices.create(ModelSchemaExtractor.class, getChatModel(chatModelConfig)); - Prompt prompt = generatePrompt(dbSchema, otherDbSchema, chatApp.get()); - ModelSchema modelSchema = - extractor.generateModelSchema(prompt.toUserMessage().singleText()); - log.info("dbSchema: {}\n otherRelatedDBSchema:{}\n modelSchema: {}", - JsonUtil.toString(dbSchema), JsonUtil.toString(otherDbSchema), - JsonUtil.toString(modelSchema)); - return modelSchema; - } - - private Prompt generatePrompt(DbSchema dbSchema, List otherDbSchema, - ChatApp chatApp) { - Map variable = new HashMap<>(); - variable.put("exemplar", loadExemplars()); - variable.put("DBSchema", JsonUtil.toString(dbSchema)); - variable.put("otherRelatedDBSchema", JsonUtil.toString(otherDbSchema)); - return PromptTemplate.from(chatApp.getPrompt()).apply(variable); - } - - private String loadExemplars() { - if (!enableExemplarLoading) { - log.info("Not enable load model-building exemplars"); - return ""; - } - try { - ClassPathResource resource = new ClassPathResource(SYS_EXEMPLAR_FILE); - if (resource.exists()) { - InputStream inputStream = resource.getInputStream(); - return objectMapper - .writeValueAsString(objectMapper.readValue(inputStream, Object.class)); - } - } catch (Exception e) { - log.error("Failed to load model-building system exemplars", e); - } - return ""; - } - -} diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/ModelServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/ModelServiceImpl.java index c8189e63e..a3bc247fc 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/ModelServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/ModelServiceImpl.java @@ -2,7 +2,6 @@ package com.tencent.supersonic.headless.server.service.impl; import com.google.common.collect.Lists; import com.tencent.supersonic.auth.api.authentication.service.UserService; -import com.tencent.supersonic.common.config.ChatModel; import com.tencent.supersonic.common.pojo.ItemDateResp; import com.tencent.supersonic.common.pojo.ModelRela; import com.tencent.supersonic.common.pojo.User; @@ -35,7 +34,6 @@ import com.tencent.supersonic.headless.api.pojo.response.DomainResp; import com.tencent.supersonic.headless.api.pojo.response.MetricResp; import com.tencent.supersonic.headless.api.pojo.response.ModelResp; import com.tencent.supersonic.headless.api.pojo.response.UnAvailableItemResp; -import com.tencent.supersonic.headless.server.builder.ModelIntelligentBuilder; import com.tencent.supersonic.headless.server.persistence.dataobject.DateInfoDO; import com.tencent.supersonic.headless.server.persistence.dataobject.ModelDO; import com.tencent.supersonic.headless.server.persistence.repository.DateInfoRepository; @@ -50,14 +48,6 @@ import com.tencent.supersonic.headless.server.service.ModelRelaService; import com.tencent.supersonic.headless.server.service.ModelService; import com.tencent.supersonic.headless.server.utils.ModelConverter; import com.tencent.supersonic.headless.server.utils.NameCheckUtils; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.springframework.beans.BeanUtils; -import org.springframework.context.annotation.Lazy; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; -import org.springframework.util.CollectionUtils; - import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; @@ -75,6 +65,13 @@ import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.springframework.beans.BeanUtils; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.CollectionUtils; @Service @Slf4j @@ -96,8 +93,6 @@ public class ModelServiceImpl implements ModelService { private DateInfoRepository dateInfoRepository; - private ModelIntelligentBuilder modelIntelligentBuilder; - private ChatModelService chatModelService; private ModelRelaService modelRelaService; @@ -108,7 +103,7 @@ public class ModelServiceImpl implements ModelService { public ModelServiceImpl(ModelRepository modelRepository, DatabaseService databaseService, @Lazy DimensionService dimensionService, @Lazy MetricService metricService, DomainService domainService, UserService userService, DataSetService dataSetService, - DateInfoRepository dateInfoRepository, ModelIntelligentBuilder modelIntelligentBuilder, + DateInfoRepository dateInfoRepository, ChatModelService chatModelService, ModelRelaService modelRelaService) { this.modelRepository = modelRepository; this.databaseService = databaseService; @@ -118,7 +113,6 @@ public class ModelServiceImpl implements ModelService { this.userService = userService; this.dataSetService = dataSetService; this.dateInfoRepository = dateInfoRepository; - this.modelIntelligentBuilder = modelIntelligentBuilder; this.chatModelService = chatModelService; this.modelRelaService = modelRelaService; } @@ -219,30 +213,21 @@ public class ModelServiceImpl implements ModelService { @Override public Map buildModelSchema(ModelBuildReq modelBuildReq) throws SQLException { - if (modelBuildReq.isBuildByLLM() && modelBuildReq.getChatModelConfig() == null) { - ChatModel chatModel = chatModelService.getChatModel(modelBuildReq.getChatModelId()); - modelBuildReq.setChatModelConfig(chatModel.getConfig()); - } List dbSchemas = getDbSchemes(modelBuildReq); Map modelSchemaMap = new ConcurrentHashMap<>(); CompletableFuture.allOf(dbSchemas.stream() - .map(dbSchema -> CompletableFuture.runAsync( - () -> doBuild(modelBuildReq, dbSchema, dbSchemas, modelSchemaMap), - executor)) + .map(dbSchema -> CompletableFuture.runAsync(() -> + doBuild(dbSchema, modelSchemaMap), executor)) .toArray(CompletableFuture[]::new)).join(); return modelSchemaMap; } - private void doBuild(ModelBuildReq modelBuildReq, DbSchema curSchema, List dbSchemas, - Map modelSchemaMap) { - if (modelBuildReq.isBuildByLLM()) { - List otherDbSchema = getOtherDbSchema(curSchema, dbSchemas); - ModelSchema modelSchema = - modelIntelligentBuilder.build(curSchema, otherDbSchema, modelBuildReq); - modelSchemaMap.put(curSchema.getTable(), modelSchema); - } else { - modelSchemaMap.put(curSchema.getTable(), build(curSchema.getDbColumns())); - } + private void doBuild(DbSchema dbSchema, Map modelSchemaMap) { + ModelSchema modelSchema = new ModelSchema(); + List fieldSchemas = + dbSchema.getDbColumns().stream().map(this::convert).collect(Collectors.toList()); + modelSchema.setFiledSchemas(fieldSchemas); + modelSchemaMap.put(dbSchema.getTable(), modelSchema); } private List getDbSchemes(ModelBuildReq modelBuildReq) throws SQLException { @@ -250,12 +235,6 @@ public class ModelServiceImpl implements ModelService { return convert(dbColumnMap, modelBuildReq); } - private List getOtherDbSchema(DbSchema curSchema, List dbSchemas) { - return dbSchemas.stream() - .filter(dbSchema -> !dbSchema.getTable().equals(curSchema.getTable())) - .collect(Collectors.toList()); - } - private List convert(Map> dbColumnMap, ModelBuildReq modelSchemaReq) { return dbColumnMap.keySet().stream() @@ -281,14 +260,6 @@ public class ModelServiceImpl implements ModelService { return fieldSchema; } - private ModelSchema build(List dbColumns) { - ModelSchema modelSchema = new ModelSchema(); - List fieldSchemas = - dbColumns.stream().map(this::convert).collect(Collectors.toList()); - modelSchema.setFiledSchemas(fieldSchemas); - return modelSchema; - } - private void batchCreateDimension(ModelDO modelDO, User user) throws Exception { List dimensionReqs = ModelConverter.convertDimensionList(modelDO); dimensionService.createDimensionBatch(dimensionReqs, user); diff --git a/headless/server/src/test/java/com/tencent/supersonic/headless/server/service/ModelServiceImplTest.java b/headless/server/src/test/java/com/tencent/supersonic/headless/server/service/ModelServiceImplTest.java index eb8157507..38e2c806b 100644 --- a/headless/server/src/test/java/com/tencent/supersonic/headless/server/service/ModelServiceImplTest.java +++ b/headless/server/src/test/java/com/tencent/supersonic/headless/server/service/ModelServiceImplTest.java @@ -15,19 +15,16 @@ import com.tencent.supersonic.headless.api.pojo.enums.DimensionType; import com.tencent.supersonic.headless.api.pojo.enums.IdentifyType; import com.tencent.supersonic.headless.api.pojo.request.ModelReq; import com.tencent.supersonic.headless.api.pojo.response.ModelResp; -import com.tencent.supersonic.headless.server.builder.ModelIntelligentBuilder; import com.tencent.supersonic.headless.server.persistence.dataobject.ModelDO; import com.tencent.supersonic.headless.server.persistence.repository.DateInfoRepository; import com.tencent.supersonic.headless.server.persistence.repository.ModelRepository; import com.tencent.supersonic.headless.server.service.impl.ModelServiceImpl; import com.tencent.supersonic.headless.server.utils.ModelConverter; +import java.util.ArrayList; +import java.util.List; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.mockito.Mockito; - -import java.util.ArrayList; -import java.util.List; - import static org.mockito.Mockito.when; class ModelServiceImplTest { @@ -77,13 +74,11 @@ class ModelServiceImplTest { UserService userService = Mockito.mock(UserService.class); DateInfoRepository dateInfoRepository = Mockito.mock(DateInfoRepository.class); DataSetService viewService = Mockito.mock(DataSetService.class); - ModelIntelligentBuilder modelIntelligentBuilder = - Mockito.mock(ModelIntelligentBuilder.class); ChatModelService chatModelService = Mockito.mock(ChatModelService.class); ModelRelaService modelRelaService = Mockito.mock(ModelRelaService.class); return new ModelServiceImpl(modelRepository, databaseService, dimensionService, metricService, domainService, userService, viewService, dateInfoRepository, - modelIntelligentBuilder, chatModelService, modelRelaService); + chatModelService, modelRelaService); } private ModelReq mockModelReq() { diff --git a/launchers/standalone/src/test/java/com/tencent/supersonic/headless/ModelIntelligentBuildTest.java b/launchers/standalone/src/test/java/com/tencent/supersonic/headless/ModelIntelligentBuildTest.java deleted file mode 100644 index 11b68f27e..000000000 --- a/launchers/standalone/src/test/java/com/tencent/supersonic/headless/ModelIntelligentBuildTest.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.tencent.supersonic.headless; - - -import com.google.common.collect.Lists; -import com.tencent.supersonic.common.pojo.ChatModelConfig; -import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum; -import com.tencent.supersonic.headless.api.pojo.ModelSchema; -import com.tencent.supersonic.headless.api.pojo.enums.FieldType; -import com.tencent.supersonic.headless.api.pojo.request.ModelBuildReq; -import com.tencent.supersonic.headless.server.service.ModelService; -import com.tencent.supersonic.util.LLMConfigUtils; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.test.context.TestPropertySource; - -import java.sql.SQLException; -import java.util.Map; - -@Disabled -@TestPropertySource(properties = {"s2.model.building.exemplars.enabled = false"}) -public class ModelIntelligentBuildTest extends BaseTest { - - private LLMConfigUtils.LLMType llmType = LLMConfigUtils.LLMType.OLLAMA_LLAMA3; - - @Autowired - private ModelService modelService; - - @Test - public void testBuildModelBatch() throws SQLException { - ChatModelConfig llmConfig = LLMConfigUtils.getLLMConfig(llmType); - ModelBuildReq modelSchemaReq = new ModelBuildReq(); - modelSchemaReq.setChatModelConfig(llmConfig); - modelSchemaReq.setBuildByLLM(true); - modelSchemaReq.setDatabaseId(1L); - modelSchemaReq.setDb("semantic"); - modelSchemaReq.setTables(Lists.newArrayList("s2_user_department", "s2_stay_time_statis")); - Map modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); - - ModelSchema userModelSchema = modelSchemaMap.get("s2_user_department"); - Assertions.assertEquals(2, userModelSchema.getFiledSchemas().size()); - Assertions.assertEquals(FieldType.primary_key, - userModelSchema.getFieldByName("user_name").getFiledType()); - Assertions.assertEquals(FieldType.dimension, - userModelSchema.getFieldByName("department").getFiledType()); - - ModelSchema stayTimeModelSchema = modelSchemaMap.get("s2_stay_time_statis"); - Assertions.assertEquals(4, stayTimeModelSchema.getFiledSchemas().size()); - Assertions.assertEquals(FieldType.foreign_key, - stayTimeModelSchema.getFieldByName("user_name").getFiledType()); - Assertions.assertEquals(FieldType.data_time, - stayTimeModelSchema.getFieldByName("imp_date").getFiledType()); - Assertions.assertEquals(FieldType.dimension, - stayTimeModelSchema.getFieldByName("page").getFiledType()); - Assertions.assertEquals(FieldType.measure, - stayTimeModelSchema.getFieldByName("stay_hours").getFiledType()); - Assertions.assertEquals(AggOperatorEnum.SUM, - stayTimeModelSchema.getFieldByName("stay_hours").getAgg()); - } - - @Test - public void testBuildModelBySql() throws SQLException { - ChatModelConfig llmConfig = LLMConfigUtils.getLLMConfig(llmType); - ModelBuildReq modelSchemaReq = new ModelBuildReq(); - modelSchemaReq.setChatModelConfig(llmConfig); - modelSchemaReq.setBuildByLLM(true); - modelSchemaReq.setDatabaseId(1L); - modelSchemaReq.setDb("semantic"); - modelSchemaReq.setSql( - "SELECT imp_date, user_name, page, 1 as pv, user_name as uv FROM s2_pv_uv_statis"); - Map modelSchemaMap = modelService.buildModelSchema(modelSchemaReq); - - ModelSchema pvModelSchema = modelSchemaMap.values().iterator().next(); - Assertions.assertEquals(5, pvModelSchema.getFiledSchemas().size()); - Assertions.assertEquals(FieldType.data_time, - pvModelSchema.getFieldByName("imp_date").getFiledType()); - Assertions.assertEquals(FieldType.dimension, - pvModelSchema.getFieldByName("user_name").getFiledType()); - Assertions.assertEquals(FieldType.dimension, - pvModelSchema.getFieldByName("page").getFiledType()); - Assertions.assertEquals(FieldType.measure, - pvModelSchema.getFieldByName("pv").getFiledType()); - Assertions.assertEquals(AggOperatorEnum.SUM, pvModelSchema.getFieldByName("pv").getAgg()); - Assertions.assertEquals(FieldType.measure, - pvModelSchema.getFieldByName("uv").getFiledType()); - Assertions.assertEquals(AggOperatorEnum.COUNT_DISTINCT, - pvModelSchema.getFieldByName("uv").getAgg()); - } - -}