From aa433baa06b794c5adc7aee7157635c1a60b9992 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Fri, 24 Nov 2023 15:59:29 +0800 Subject: [PATCH] [improvement](python) LLM parsing related services support Python service and Java service invocation (#418) --- chat/core/pom.xml | 13 + .../chat/config/OptimizationConfig.java | 13 + .../chat/llm/EmbedLLMInterpreter.java | 83 +++++ .../chat/llm/HttpLLMInterpreter.java | 71 ++++ .../supersonic/chat/llm/LLMInterpreter.java | 18 + .../llm/listener/EmbeddingInitListener.java | 43 +++ .../prompt/FunctionCallPromptGenerator.java | 43 +++ .../chat/llm/prompt/InputFormat.java | 43 +++ .../chat/llm/prompt/OutputFormat.java | 54 +++ .../chat/llm/prompt/SqlExample.java | 32 ++ .../chat/llm/prompt/SqlExampleLoader.java | 50 +++ .../chat/llm/prompt/SqlPromptGenerator.java | 66 ++++ .../llm/vectordb/EmbeddingStoreFactory.java | 20 ++ .../llm/vectordb/EmbeddingStoreOperator.java | 55 +++ .../parser/llm/s2sql/LLMRequestService.java | 9 +- .../embedding/EmbeddingBasedParser.java | 7 +- .../plugin/function/FunctionBasedParser.java | 43 +-- .../chat/service/LLMParserLayer.java | 13 - .../chat/service/impl/LLMParserLayerImpl.java | 47 --- .../chat/utils/ComponentFactory.java | 11 + .../main/resources/META-INF/spring.factories | 3 + launchers/standalone/pom.xml | 14 + .../supersonic/config/LangChain4jConfig.java | 15 + .../main/resources/META-INF/spring.factories | 7 +- .../src/main/resources/application-local.yaml | 15 + .../src/main/resources/example.json | 312 ++++++++++++++++++ pom.xml | 52 +++ .../semantic/model/domain/pojo/Database.java | 5 +- 28 files changed, 1054 insertions(+), 103 deletions(-) create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/EmbedLLMInterpreter.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/HttpLLMInterpreter.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/LLMInterpreter.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/listener/EmbeddingInitListener.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/FunctionCallPromptGenerator.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/InputFormat.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/OutputFormat.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExample.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExampleLoader.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlPromptGenerator.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreFactory.java create mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreOperator.java delete mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/service/LLMParserLayer.java delete mode 100644 chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/LLMParserLayerImpl.java create mode 100644 launchers/standalone/src/main/java/com/tencent/supersonic/config/LangChain4jConfig.java create mode 100644 launchers/standalone/src/main/resources/example.json diff --git a/chat/core/pom.xml b/chat/core/pom.xml index 42cc3a8ad..e42d719fc 100644 --- a/chat/core/pom.xml +++ b/chat/core/pom.xml @@ -116,6 +116,19 @@ ${mockito-inline.version} test + + + dev.langchain4j + langchain4j-open-ai + + + dev.langchain4j + langchain4j + + + dev.langchain4j + langchain4j-chroma + diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/config/OptimizationConfig.java b/chat/core/src/main/java/com/tencent/supersonic/chat/config/OptimizationConfig.java index 630e9ec49..e3ac48b10 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/config/OptimizationConfig.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/config/OptimizationConfig.java @@ -59,6 +59,19 @@ public class OptimizationConfig { @Value("${s2SQL.use.switch:true}") private boolean useS2SqlSwitch; + + @Value("${text2sql.example.num:10}") + private int text2sqlExampleNum; + + @Value("${text2sql.fewShots.num:10}") + private int text2sqlFewShotsNum; + + @Value("${text2sql.self.consistency.num:5}") + private int text2sqlSelfConsistencyNum; + + @Value("${text2sql.collection.name:text2dsl_agent_collection}") + private String text2sqlCollectionName; + @Autowired private SysParameterService sysParameterService; diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/EmbedLLMInterpreter.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/EmbedLLMInterpreter.java new file mode 100644 index 000000000..bb5130a1e --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/EmbedLLMInterpreter.java @@ -0,0 +1,83 @@ +package com.tencent.supersonic.chat.llm; + +import com.tencent.supersonic.chat.config.OptimizationConfig; +import com.tencent.supersonic.chat.llm.prompt.FunctionCallPromptGenerator; +import com.tencent.supersonic.chat.llm.prompt.OutputFormat; +import com.tencent.supersonic.chat.llm.prompt.SqlExampleLoader; +import com.tencent.supersonic.chat.llm.prompt.SqlPromptGenerator; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; +import com.tencent.supersonic.common.util.ContextUtils; +import com.tencent.supersonic.common.util.JsonUtil; +import dev.langchain4j.data.message.AiMessage; +import dev.langchain4j.model.chat.ChatLanguageModel; +import dev.langchain4j.model.input.Prompt; +import dev.langchain4j.model.input.PromptTemplate; +import dev.langchain4j.model.output.Response; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EmbedLLMInterpreter implements LLMInterpreter { + + public LLMResp query2sql(LLMReq llmReq, Long modelId) { + + ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class); + + SqlExampleLoader sqlExampleLoader = ContextUtils.getBean(SqlExampleLoader.class); + + OptimizationConfig config = ContextUtils.getBean(OptimizationConfig.class); + + List> sqlExamples = sqlExampleLoader.retrieverSqlExamples(llmReq.getQueryText(), + config.getText2sqlCollectionName(), config.getText2sqlFewShotsNum()); + + String queryText = llmReq.getQueryText(); + String modelName = llmReq.getSchema().getModelName(); + List fieldNameList = llmReq.getSchema().getFieldNameList(); + List linking = llmReq.getLinking(); + + SqlPromptGenerator sqlPromptGenerator = ContextUtils.getBean(SqlPromptGenerator.class); + String linkingPromptStr = sqlPromptGenerator.generateSchemaLinkingPrompt(queryText, modelName, fieldNameList, + linking, sqlExamples); + + Prompt linkingPrompt = PromptTemplate.from(JsonUtil.toString(linkingPromptStr)).apply(new HashMap<>()); + Response linkingResult = chatLanguageModel.generate(linkingPrompt.toSystemMessage()); + + String schemaLinkStr = OutputFormat.schemaLinkParse(linkingResult.content().text()); + + String generateSqlPrompt = sqlPromptGenerator.generateSqlPrompt(queryText, modelName, schemaLinkStr, + llmReq.getCurrentDate(), sqlExamples); + + Prompt sqlPrompt = PromptTemplate.from(JsonUtil.toString(generateSqlPrompt)).apply(new HashMap<>()); + Response sqlResult = chatLanguageModel.generate(sqlPrompt.toSystemMessage()); + + LLMResp result = new LLMResp(); + result.setQuery(queryText); + result.setSchemaLinkingOutput(linkingPromptStr); + result.setSchemaLinkStr(schemaLinkStr); + result.setModelName(modelName); + result.setSqlOutput(sqlResult.content().text()); + return result; + } + + @Override + public FunctionResp requestFunction(FunctionReq functionReq) { + + FunctionCallPromptGenerator promptGenerator = ContextUtils.getBean(FunctionCallPromptGenerator.class); + + String functionCallPrompt = promptGenerator.generateFunctionCallPrompt(functionReq.getQueryText(), + functionReq.getPluginConfigs()); + + ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class); + + String functionSelect = chatLanguageModel.generate(functionCallPrompt); + + return OutputFormat.functionCallParse(functionSelect); + } + +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/HttpLLMInterpreter.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/HttpLLMInterpreter.java new file mode 100644 index 000000000..9613bcdea --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/HttpLLMInterpreter.java @@ -0,0 +1,71 @@ +package com.tencent.supersonic.chat.llm; + +import com.alibaba.fastjson.JSON; +import com.tencent.supersonic.chat.config.LLMParserConfig; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionCallConfig; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; +import com.tencent.supersonic.common.util.ContextUtils; +import com.tencent.supersonic.common.util.JsonUtil; +import java.net.URI; +import java.net.URL; +import lombok.extern.slf4j.Slf4j; +import org.springframework.http.HttpEntity; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpMethod; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.client.RestTemplate; +import org.springframework.web.util.UriComponentsBuilder; + +@Slf4j +public class HttpLLMInterpreter implements LLMInterpreter { + + public LLMResp query2sql(LLMReq llmReq, Long modelId) { + + long startTime = System.currentTimeMillis(); + log.info("requestLLM request, modelId:{},llmReq:{}", modelId, llmReq); + try { + LLMParserConfig llmParserConfig = ContextUtils.getBean(LLMParserConfig.class); + + URL url = new URL(new URL(llmParserConfig.getUrl()), llmParserConfig.getQueryToSqlPath()); + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers); + RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class); + ResponseEntity responseEntity = restTemplate.exchange(url.toString(), HttpMethod.POST, entity, + LLMResp.class); + + log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}", + System.currentTimeMillis() - startTime, url, entity, responseEntity.getBody()); + return responseEntity.getBody(); + } catch (Exception e) { + log.error("requestLLM error", e); + } + return null; + } + + public FunctionResp requestFunction(FunctionReq functionReq) { + FunctionCallConfig functionCallInfoConfig = ContextUtils.getBean(FunctionCallConfig.class); + String url = functionCallInfoConfig.getUrl() + functionCallInfoConfig.getPluginSelectPath(); + HttpHeaders headers = new HttpHeaders(); + long startTime = System.currentTimeMillis(); + headers.setContentType(MediaType.APPLICATION_JSON); + HttpEntity entity = new HttpEntity<>(JSON.toJSONString(functionReq), headers); + URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri(); + RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class); + try { + log.info("requestFunction functionReq:{}", JsonUtil.toString(functionReq)); + ResponseEntity responseEntity = restTemplate.exchange(requestUrl, HttpMethod.POST, entity, + FunctionResp.class); + log.info("requestFunction responseEntity:{},cost:{}", responseEntity, + System.currentTimeMillis() - startTime); + return responseEntity.getBody(); + } catch (Exception e) { + log.error("requestFunction error", e); + } + return null; + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/LLMInterpreter.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/LLMInterpreter.java new file mode 100644 index 000000000..701da8d54 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/LLMInterpreter.java @@ -0,0 +1,18 @@ +package com.tencent.supersonic.chat.llm; + +import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq; +import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; +import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; + +/** + * Unified interpreter for invoking the llm layer. + */ +public interface LLMInterpreter { + + + LLMResp query2sql(LLMReq llmReq, Long modelId); + + FunctionResp requestFunction(FunctionReq functionReq); + +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/listener/EmbeddingInitListener.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/listener/EmbeddingInitListener.java new file mode 100644 index 000000000..8ca5d1ff0 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/listener/EmbeddingInitListener.java @@ -0,0 +1,43 @@ +package com.tencent.supersonic.chat.llm.listener; + +import com.tencent.supersonic.chat.config.OptimizationConfig; +import com.tencent.supersonic.chat.llm.EmbedLLMInterpreter; +import com.tencent.supersonic.chat.llm.LLMInterpreter; +import com.tencent.supersonic.chat.llm.prompt.SqlExample; +import com.tencent.supersonic.chat.llm.prompt.SqlExampleLoader; +import com.tencent.supersonic.chat.utils.ComponentFactory; +import java.util.List; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.CommandLineRunner; +import org.springframework.core.annotation.Order; +import org.springframework.stereotype.Component; + +@Slf4j +@Component +@Order(4) +public class EmbeddingInitListener implements CommandLineRunner { + + protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter(); + @Autowired + private SqlExampleLoader sqlExampleLoader; + @Autowired + private OptimizationConfig optimizationConfig; + + @Override + public void run(String... args) { + initSqlExamples(); + } + + public void initSqlExamples() { + try { + if (llmInterpreter instanceof EmbedLLMInterpreter) { + List sqlExamples = sqlExampleLoader.getSqlExamples(); + String collectionName = optimizationConfig.getText2sqlCollectionName(); + sqlExampleLoader.addEmbeddingStore(sqlExamples, collectionName); + } + } catch (Exception e) { + log.error("initSqlExamples error", e); + } + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/FunctionCallPromptGenerator.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/FunctionCallPromptGenerator.java new file mode 100644 index 000000000..748e312ec --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/FunctionCallPromptGenerator.java @@ -0,0 +1,43 @@ +package com.tencent.supersonic.chat.llm.prompt; + +import com.tencent.supersonic.chat.plugin.PluginParseConfig; +import java.util.List; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +@Component +@Slf4j +public class FunctionCallPromptGenerator { + + public String generateFunctionCallPrompt(String queryText, List toolConfigList) { + List toolExplainList = toolConfigList.stream() + .map(this::constructPluginPrompt) + .collect(Collectors.toList()); + String functionList = String.join(InputFormat.SEPERATOR, toolExplainList); + return constructTaskPrompt(queryText, functionList); + } + + public String constructPluginPrompt(PluginParseConfig parseConfig) { + String toolName = parseConfig.getName(); + String toolDescription = parseConfig.getDescription(); + List toolExamples = parseConfig.getExamples(); + + StringBuilder prompt = new StringBuilder(); + prompt.append("【工具名称】\n").append(toolName).append("\n"); + prompt.append("【工具描述】\n").append(toolDescription).append("\n"); + prompt.append("【工具适用问题示例】\n"); + for (String example : toolExamples) { + prompt.append(example).append("\n"); + } + return prompt.toString(); + } + + public String constructTaskPrompt(String queryText, String functionList) { + String instruction = String.format("问题为:%s\n请根据问题和工具的描述,选择对应的工具,完成任务。" + + "请注意,只能选择1个工具。请一步一步地分析选择工具的原因(每个工具的【工具适用问题示例】是选择的重要参考依据)," + + "并给出最终选择,输出格式为json,key为’分析过程‘, ’选择工具‘", queryText); + + return String.format("工具选择如下:\n\n%s\n\n【任务说明】\n%s", functionList, instruction); + } +} \ No newline at end of file diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/InputFormat.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/InputFormat.java new file mode 100644 index 000000000..f80215439 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/InputFormat.java @@ -0,0 +1,43 @@ +package com.tencent.supersonic.chat.llm.prompt; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class InputFormat { + + public static final String SEPERATOR = "\n\n"; + + public static String format(String template, List templateKey, + List> toFormatList) { + List result = new ArrayList<>(); + + for (Map formatItem : toFormatList) { + Map retrievalMeta = subDict(formatItem, templateKey); + result.add(format(template, retrievalMeta)); + } + + return String.join(SEPERATOR, result); + } + + + public static String format(String input, Map replacements) { + for (Map.Entry entry : replacements.entrySet()) { + input = input.replace(entry.getKey(), entry.getValue()); + } + return input; + } + + private static Map subDict(Map dict, List keys) { + Map subDict = new HashMap<>(); + for (String key : keys) { + if (dict.containsKey(key)) { + subDict.put(key, dict.get(key)); + } + } + return subDict; + } +} \ No newline at end of file diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/OutputFormat.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/OutputFormat.java new file mode 100644 index 000000000..7ec67aa47 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/OutputFormat.java @@ -0,0 +1,54 @@ +package com.tencent.supersonic.chat.llm.prompt; + +import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp; +import com.tencent.supersonic.common.util.JsonUtil; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import lombok.extern.slf4j.Slf4j; + +/*** + * output format + */ +@Slf4j +public class OutputFormat { + + public static final String PATTERN = "\\{[^{}]+\\}"; + + public static String schemaLinkParse(String schemaLinkOutput) { + try { + schemaLinkOutput = schemaLinkOutput.trim(); + String pattern = "Schema_links:(.*)"; + Pattern regexPattern = Pattern.compile(pattern, Pattern.DOTALL); + Matcher matcher = regexPattern.matcher(schemaLinkOutput); + if (matcher.find()) { + schemaLinkOutput = matcher.group(1).trim(); + } else { + schemaLinkOutput = null; + } + } catch (Exception e) { + log.error("", e); + schemaLinkOutput = null; + } + return schemaLinkOutput; + } + + + public static FunctionResp functionCallParse(String llmOutput) { + try { + String[] findResult = llmOutput.split(PATTERN); + String result = findResult[0].trim(); + + Map resultDict = JsonUtil.toMap(result, String.class, String.class); + log.info("result:{},resultDict:{}", result, resultDict); + + String selection = resultDict.get("选择工具"); + FunctionResp resp = new FunctionResp(); + resp.setToolSelection(selection); + return resp; + } catch (Exception e) { + log.error("", e); + return null; + } + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExample.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExample.java new file mode 100644 index 000000000..237cee534 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExample.java @@ -0,0 +1,32 @@ +package com.tencent.supersonic.chat.llm.prompt; + +import com.fasterxml.jackson.annotation.JsonProperty; +import lombok.Data; + +@Data +public class SqlExample { + + @JsonProperty("currentDate") + private String currentDate; + + @JsonProperty("tableName") + private String tableName; + + @JsonProperty("fieldsList") + private String fieldsList; + + @JsonProperty("question") + private String question; + + @JsonProperty("priorSchemaLinks") + private String priorSchemaLinks; + + @JsonProperty("analysis") + private String analysis; + + @JsonProperty("schemaLinks") + private String schemaLinks; + + @JsonProperty("sql") + private String sql; +} \ No newline at end of file diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExampleLoader.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExampleLoader.java new file mode 100644 index 000000000..7a0d109ec --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlExampleLoader.java @@ -0,0 +1,50 @@ +package com.tencent.supersonic.chat.llm.prompt; + + +import com.fasterxml.jackson.core.type.TypeReference; +import com.tencent.supersonic.chat.llm.vectordb.EmbeddingStoreOperator; +import com.tencent.supersonic.common.util.JsonUtil; +import dev.langchain4j.data.segment.TextSegment; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.io.ClassPathResource; +import org.springframework.stereotype.Component; + +@Slf4j +@Component +public class SqlExampleLoader { + + private static final String EXAMPLE_JSON_FILE = "example.json"; + @Autowired + private EmbeddingStoreOperator embeddingStoreOperator; + private TypeReference> valueTypeRef = new TypeReference>() { + }; + + public List getSqlExamples() throws IOException { + ClassPathResource resource = new ClassPathResource(EXAMPLE_JSON_FILE); + InputStream inputStream = resource.getInputStream(); + return JsonUtil.INSTANCE.getObjectMapper().readValue(inputStream, valueTypeRef); + } + + public void addEmbeddingStore(List sqlExamples, String collectionName) { + embeddingStoreOperator.addAll(sqlExamples, collectionName); + } + + public List> retrieverSqlExamples(String queryText, String collectionName, int maxResults) { + List textSegments = embeddingStoreOperator.retriever(queryText, collectionName, maxResults); + + List> result = new ArrayList<>(); + for (TextSegment textSegment : textSegments) { + if (Objects.nonNull(textSegment.metadata())) { + result.add(textSegment.metadata().asMap()); + } + } + return result; + } +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlPromptGenerator.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlPromptGenerator.java new file mode 100644 index 000000000..404dda258 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/prompt/SqlPromptGenerator.java @@ -0,0 +1,66 @@ +package com.tencent.supersonic.chat.llm.prompt; + +import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +@Component +@Slf4j +public class SqlPromptGenerator { + + public String generateSchemaLinkingPrompt(String question, String modelName, List fieldsList, + List priorSchemaLinks, List> exampleList) { + + String exampleTemplate = "Table {tableName}, columns = {fieldsList}, prior_schema_links = {priorSchemaLinks}\n" + + "问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schemaLinks}"; + + List exampleKeys = Arrays.asList("tableName", "fieldsList", "priorSchemaLinks", "question", "analysis", + "schemaLinks"); + + String schemaLinkingPrompt = InputFormat.format(exampleTemplate, exampleKeys, exampleList); + + String newCaseTemplate = "Table {tableName}, columns = {fieldsList}, prior_schema_links = {priorSchemaLinks}\n" + + "问题:{question}\n分析: 让我们一步一步地思考。"; + + String newCasePrompt = newCaseTemplate.replace("{tableName}", modelName) + .replace("{fieldsList}", fieldsList.toString()) + .replace("{priorSchemaLinks}", getPriorSchemaLinks(priorSchemaLinks)) + .replace("{question}", question); + + String instruction = "# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links"; + return instruction + InputFormat.SEPERATOR + schemaLinkingPrompt + InputFormat.SEPERATOR + newCasePrompt; + } + + private String getPriorSchemaLinks(List priorSchemaLinks) { + return priorSchemaLinks.stream() + .map(elementValue -> "'" + elementValue.getFieldName() + "'->" + elementValue.getFieldValue()) + .collect(Collectors.joining(",", "[", "]")); + } + + public String generateSqlPrompt(String question, String modelName, String schemaLinkStr, String dataDate, + List> exampleList) { + + List exampleKeys = Arrays.asList("question", "currentDate", "tableName", "schemaLinks", "sql"); + String exampleTemplate = "问题:{question}\nCurrent_date:{currentDate}\nTable {tableName}\n" + + "Schema_links:{schemaLinks}\nSQL:{sql}"; + + String sqlExamplePrompt = InputFormat.format(exampleTemplate, exampleKeys, exampleList); + + String newCaseTemplate = "问题:{question}\nCurrent_date:{currentDate}\nTable {tableName}\n" + + "Schema_links:{schemaLinks}\nSQL:"; + + String newCasePrompt = newCaseTemplate.replace("{question}", question) + .replace("{currentDate}", dataDate) + .replace("{tableName}", modelName) + .replace("{schemaLinks}", schemaLinkStr); + + String instruction = "# 根据schema_links为每个问题生成SQL查询语句"; + return instruction + InputFormat.SEPERATOR + sqlExamplePrompt + InputFormat.SEPERATOR + newCasePrompt; + } + + +} \ No newline at end of file diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreFactory.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreFactory.java new file mode 100644 index 000000000..c5c17b0d4 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreFactory.java @@ -0,0 +1,20 @@ +package com.tencent.supersonic.chat.llm.vectordb; + +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EmbeddingStoreFactory { + + private static Map collectionNameToStore = new ConcurrentHashMap<>(); + + + public static EmbeddingStore create(String collectionName) { + return collectionNameToStore.computeIfAbsent(collectionName, k -> new InMemoryEmbeddingStore()); + } + + +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreOperator.java b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreOperator.java new file mode 100644 index 000000000..236f958e2 --- /dev/null +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/llm/vectordb/EmbeddingStoreOperator.java @@ -0,0 +1,55 @@ +package com.tencent.supersonic.chat.llm.vectordb; + +import com.tencent.supersonic.chat.llm.prompt.SqlExample; +import com.tencent.supersonic.common.util.JsonUtil; +import dev.langchain4j.data.document.Metadata; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.retriever.EmbeddingStoreRetriever; +import dev.langchain4j.store.embedding.EmbeddingStore; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +@Service +@Slf4j +public class EmbeddingStoreOperator { + + @Autowired + private EmbeddingModel embeddingModel; + + public List retriever(String text, String collectionName, int maxResults) { + EmbeddingStore embeddingStore = EmbeddingStoreFactory.create(collectionName); + EmbeddingStoreRetriever retriever = EmbeddingStoreRetriever.from(embeddingStore, embeddingModel, maxResults); + return retriever.findRelevant(text); + } + + public List addAll(List sqlExamples, String collectionName) { + List embeddings = new ArrayList<>(); + List textSegments = new ArrayList<>(); + + for (SqlExample sqlExample : sqlExamples) { + String question = sqlExample.getQuestion(); + Embedding embedding = embeddingModel.embed(question).content(); + embeddings.add(embedding); + + Map metaDataMap = JsonUtil.toMap(JsonUtil.toString(sqlExample), String.class, + String.class); + + TextSegment textSegment = TextSegment.from(question, new Metadata(metaDataMap)); + textSegments.add(textSegment); + } + return addAllInternal(embeddings, textSegments, collectionName); + } + + private List addAllInternal(List embeddings, List textSegments, + String collectionName) { + EmbeddingStore embeddingStore = EmbeddingStoreFactory.create(collectionName); + return embeddingStore.addAll(embeddings, textSegments); + } + +} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2sql/LLMRequestService.java b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2sql/LLMRequestService.java index eab046dc9..08f07c86f 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2sql/LLMRequestService.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/s2sql/LLMRequestService.java @@ -17,7 +17,7 @@ import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue; import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; import com.tencent.supersonic.chat.service.AgentService; -import com.tencent.supersonic.chat.service.LLMParserLayer; +import com.tencent.supersonic.chat.llm.LLMInterpreter; import com.tencent.supersonic.chat.utils.ComponentFactory; import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum; import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; @@ -46,6 +46,8 @@ import org.springframework.util.CollectionUtils; @Service public class LLMRequestService { + protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter(); + protected SemanticInterpreter semanticInterpreter = ComponentFactory.getSemanticLayer(); @Autowired private LLMParserConfig llmParserConfig; @@ -55,8 +57,7 @@ public class LLMRequestService { private SchemaService schemaService; @Autowired private OptimizationConfig optimizationConfig; - @Autowired - private LLMParserLayer llmParserLayer; + public boolean check(QueryContext queryCtx) { QueryReq request = queryCtx.getRequest(); @@ -137,7 +138,7 @@ public class LLMRequestService { } public LLMResp requestLLM(LLMReq llmReq, Long modelId) { - return llmParserLayer.query2sql(llmReq, modelId); + return llmInterpreter.query2sql(llmReq, modelId); } protected List getFieldNameList(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) { diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/embedding/EmbeddingBasedParser.java b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/embedding/EmbeddingBasedParser.java index 403ad5b4c..2359c8915 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/embedding/EmbeddingBasedParser.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/embedding/EmbeddingBasedParser.java @@ -2,11 +2,14 @@ package com.tencent.supersonic.chat.parser.plugin.embedding; import com.google.common.collect.Lists; import com.tencent.supersonic.chat.api.pojo.QueryContext; +import com.tencent.supersonic.chat.llm.HttpLLMInterpreter; +import com.tencent.supersonic.chat.llm.LLMInterpreter; import com.tencent.supersonic.chat.parser.ParseMode; import com.tencent.supersonic.chat.parser.plugin.PluginParser; import com.tencent.supersonic.chat.plugin.Plugin; import com.tencent.supersonic.chat.plugin.PluginManager; import com.tencent.supersonic.chat.plugin.PluginRecallResult; +import com.tencent.supersonic.chat.utils.ComponentFactory; import com.tencent.supersonic.common.config.EmbeddingConfig; import com.tencent.supersonic.common.util.ContextUtils; import java.util.Comparator; @@ -22,10 +25,12 @@ import org.springframework.util.CollectionUtils; @Slf4j public class EmbeddingBasedParser extends PluginParser { + protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter(); + @Override public boolean checkPreCondition(QueryContext queryContext) { EmbeddingConfig embeddingConfig = ContextUtils.getBean(EmbeddingConfig.class); - if (StringUtils.isBlank(embeddingConfig.getUrl())) { + if (StringUtils.isBlank(embeddingConfig.getUrl()) && llmInterpreter instanceof HttpLLMInterpreter) { return false; } List plugins = getPluginList(queryContext); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/function/FunctionBasedParser.java b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/function/FunctionBasedParser.java index f3e4460c3..e1513cd83 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/function/FunctionBasedParser.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/plugin/function/FunctionBasedParser.java @@ -1,7 +1,8 @@ package com.tencent.supersonic.chat.parser.plugin.function; -import com.alibaba.fastjson.JSON; import com.tencent.supersonic.chat.api.pojo.QueryContext; +import com.tencent.supersonic.chat.llm.HttpLLMInterpreter; +import com.tencent.supersonic.chat.llm.LLMInterpreter; import com.tencent.supersonic.chat.parser.ParseMode; import com.tencent.supersonic.chat.parser.plugin.PluginParser; import com.tencent.supersonic.chat.plugin.Plugin; @@ -10,34 +11,29 @@ import com.tencent.supersonic.chat.plugin.PluginParseConfig; import com.tencent.supersonic.chat.plugin.PluginRecallResult; import com.tencent.supersonic.chat.query.llm.s2sql.S2SQLQuery; import com.tencent.supersonic.chat.service.PluginService; +import com.tencent.supersonic.chat.utils.ComponentFactory; import com.tencent.supersonic.common.util.ContextUtils; -import java.net.URI; +import com.tencent.supersonic.common.util.JsonUtil; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; -import java.util.Objects; import java.util.stream.Collectors; -import com.tencent.supersonic.common.util.JsonUtil; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; -import org.springframework.http.HttpEntity; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpMethod; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; import org.springframework.util.CollectionUtils; -import org.springframework.web.client.RestTemplate; -import org.springframework.web.util.UriComponentsBuilder; @Slf4j public class FunctionBasedParser extends PluginParser { + protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter(); + @Override public boolean checkPreCondition(QueryContext queryContext) { FunctionCallConfig functionCallConfig = ContextUtils.getBean(FunctionCallConfig.class); String functionUrl = functionCallConfig.getUrl(); - if (StringUtils.isBlank(functionUrl)) { + if (StringUtils.isBlank(functionUrl) && llmInterpreter instanceof HttpLLMInterpreter) { log.info("functionUrl:{}, skip function parser, queryText:{}", functionUrl, queryContext.getRequest().getQueryText()); return false; @@ -88,7 +84,7 @@ public class FunctionBasedParser extends PluginParser { FunctionReq functionReq = FunctionReq.builder() .queryText(queryContext.getRequest().getQueryText()) .pluginConfigs(pluginToFunctionCall).build(); - functionResp = requestFunction(functionReq); + functionResp = llmInterpreter.requestFunction(functionReq); } return functionResp; } @@ -131,25 +127,4 @@ public class FunctionBasedParser extends PluginParser { return functionDOList; } - public FunctionResp requestFunction(FunctionReq functionReq) { - FunctionCallConfig functionCallInfoConfig = ContextUtils.getBean(FunctionCallConfig.class); - String url = functionCallInfoConfig.getUrl() + functionCallInfoConfig.getPluginSelectPath(); - HttpHeaders headers = new HttpHeaders(); - long startTime = System.currentTimeMillis(); - headers.setContentType(MediaType.APPLICATION_JSON); - HttpEntity entity = new HttpEntity<>(JSON.toJSONString(functionReq), headers); - URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri(); - RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class); - try { - log.info("requestFunction functionReq:{}", JsonUtil.toString(functionReq)); - ResponseEntity responseEntity = restTemplate.exchange(requestUrl, HttpMethod.POST, entity, - FunctionResp.class); - log.info("requestFunction responseEntity:{},cost:{}", responseEntity, - System.currentTimeMillis() - startTime); - return responseEntity.getBody(); - } catch (Exception e) { - log.error("requestFunction error", e); - } - return null; - } } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/service/LLMParserLayer.java b/chat/core/src/main/java/com/tencent/supersonic/chat/service/LLMParserLayer.java deleted file mode 100644 index bdbdeeb24..000000000 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/service/LLMParserLayer.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.tencent.supersonic.chat.service; - -import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; -import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; - -/** - * Unified wrapper for invoking the llmparser Python service layer. - */ -public interface LLMParserLayer { - - LLMResp query2sql(LLMReq llmReq, Long modelId); - -} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/LLMParserLayerImpl.java b/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/LLMParserLayerImpl.java deleted file mode 100644 index 51b6266d4..000000000 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/service/impl/LLMParserLayerImpl.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.tencent.supersonic.chat.service.impl; - -import com.tencent.supersonic.chat.config.LLMParserConfig; -import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq; -import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp; -import com.tencent.supersonic.chat.service.LLMParserLayer; -import com.tencent.supersonic.common.util.JsonUtil; -import java.net.URL; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.http.HttpEntity; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpMethod; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Service; -import org.springframework.web.client.RestTemplate; - -@Service -@Slf4j -public class LLMParserLayerImpl implements LLMParserLayer { - - @Autowired - private RestTemplate restTemplate; - @Autowired - private LLMParserConfig llmParserConfig; - - public LLMResp query2sql(LLMReq llmReq, Long modelId) { - long startTime = System.currentTimeMillis(); - log.info("requestLLM request, modelId:{},llmReq:{}", modelId, llmReq); - try { - URL url = new URL(new URL(llmParserConfig.getUrl()), llmParserConfig.getQueryToSqlPath()); - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_JSON); - HttpEntity entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers); - ResponseEntity responseEntity = restTemplate.exchange(url.toString(), HttpMethod.POST, entity, - LLMResp.class); - - log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}", - System.currentTimeMillis() - startTime, url, entity, responseEntity.getBody()); - return responseEntity.getBody(); - } catch (Exception e) { - log.error("requestLLM error", e); - } - return null; - } -} diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/utils/ComponentFactory.java b/chat/core/src/main/java/com/tencent/supersonic/chat/utils/ComponentFactory.java index 296932766..114f8fe1f 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/utils/ComponentFactory.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/utils/ComponentFactory.java @@ -4,6 +4,7 @@ import com.tencent.supersonic.chat.api.component.SchemaMapper; import com.tencent.supersonic.chat.api.component.SemanticCorrector; import com.tencent.supersonic.chat.api.component.SemanticInterpreter; import com.tencent.supersonic.chat.api.component.SemanticParser; +import com.tencent.supersonic.chat.llm.LLMInterpreter; import com.tencent.supersonic.chat.parser.llm.s2sql.ModelResolver; import com.tencent.supersonic.chat.postprocessor.PostProcessor; import com.tencent.supersonic.chat.responder.execute.ExecuteResponder; @@ -20,10 +21,13 @@ public class ComponentFactory { private static List semanticParsers = new ArrayList<>(); private static List s2SQLCorrections = new ArrayList<>(); private static SemanticInterpreter semanticInterpreter; + + private static LLMInterpreter llmInterpreter; private static List postProcessors = new ArrayList<>(); private static List parseResponders = new ArrayList<>(); private static List executeResponders = new ArrayList<>(); private static ModelResolver modelResolver; + public static List getSchemaMappers() { return CollectionUtils.isEmpty(schemaMappers) ? init(SchemaMapper.class, schemaMappers) : schemaMappers; } @@ -62,6 +66,13 @@ public class ComponentFactory { } + public static LLMInterpreter getLLMInterpreter() { + if (Objects.isNull(llmInterpreter)) { + llmInterpreter = init(LLMInterpreter.class); + } + return llmInterpreter; + } + public static ModelResolver getModelResolver() { if (Objects.isNull(modelResolver)) { modelResolver = init(ModelResolver.class); diff --git a/launchers/chat/src/main/resources/META-INF/spring.factories b/launchers/chat/src/main/resources/META-INF/spring.factories index 01731b73f..c7c3a7752 100644 --- a/launchers/chat/src/main/resources/META-INF/spring.factories +++ b/launchers/chat/src/main/resources/META-INF/spring.factories @@ -19,6 +19,9 @@ com.tencent.supersonic.chat.api.component.SemanticCorrector=\ com.tencent.supersonic.chat.corrector.GroupByCorrector, \ com.tencent.supersonic.chat.corrector.HavingCorrector +com.tencent.supersonic.chat.llm.LLMInterpreter=\ + com.tencent.supersonic.chat.llm.HttpLLMInterpreter + com.tencent.supersonic.chat.api.component.SemanticInterpreter=\ com.tencent.supersonic.knowledge.semantic.RemoteSemanticInterpreter diff --git a/launchers/standalone/pom.xml b/launchers/standalone/pom.xml index 294cca0fb..09e503399 100644 --- a/launchers/standalone/pom.xml +++ b/launchers/standalone/pom.xml @@ -91,7 +91,21 @@ junit test + + org.springframework.boot + spring-boot-starter-test + test + + + + dev.langchain4j + langchain4j-spring-boot-starter + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2 + diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/config/LangChain4jConfig.java b/launchers/standalone/src/main/java/com/tencent/supersonic/config/LangChain4jConfig.java new file mode 100644 index 000000000..c76957bfc --- /dev/null +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/config/LangChain4jConfig.java @@ -0,0 +1,15 @@ +package com.tencent.supersonic.config; + +import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class LangChain4jConfig { + + @Bean + EmbeddingModel embeddingModel() { + return new AllMiniLmL6V2EmbeddingModel(); + } +} \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/META-INF/spring.factories b/launchers/standalone/src/main/resources/META-INF/spring.factories index 98a40f3b6..379827202 100644 --- a/launchers/standalone/src/main/resources/META-INF/spring.factories +++ b/launchers/standalone/src/main/resources/META-INF/spring.factories @@ -19,6 +19,9 @@ com.tencent.supersonic.chat.api.component.SemanticCorrector=\ com.tencent.supersonic.chat.corrector.GroupByCorrector, \ com.tencent.supersonic.chat.corrector.HavingCorrector +com.tencent.supersonic.chat.llm.LLMInterpreter=\ + com.tencent.supersonic.chat.llm.HttpLLMInterpreter + com.tencent.supersonic.chat.api.component.SemanticInterpreter=\ com.tencent.supersonic.knowledge.semantic.LocalSemanticInterpreter @@ -44,4 +47,6 @@ com.tencent.supersonic.chat.responder.parse.ParseResponder=\ com.tencent.supersonic.chat.responder.execute.ExecuteResponder=\ com.tencent.supersonic.chat.responder.execute.EntityInfoExecuteResponder, \ - com.tencent.supersonic.chat.responder.execute.SimilarMetricExecuteResponder \ No newline at end of file + com.tencent.supersonic.chat.responder.execute.SimilarMetricExecuteResponder + +org.springframework.boot.autoconfigure.EnableAutoConfiguration=dev.langchain4j.LangChain4jAutoConfiguration \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/application-local.yaml b/launchers/standalone/src/main/resources/application-local.yaml index c9099e384..f33cec827 100644 --- a/launchers/standalone/src/main/resources/application-local.yaml +++ b/launchers/standalone/src/main/resources/application-local.yaml @@ -40,3 +40,18 @@ embedding: url: http://127.0.0.1:9092 functionCall: url: http://127.0.0.1:9092 + + +langchain4j: + chat-model: + provider: open_ai + openai: + api-key: api_key + model-name: gpt-3.5-turbo + temperature: 0.0 + timeout: PT60S + +logging: + level: + dev.langchain4j: DEBUG + dev.ai4j.openai4j: DEBUG \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/example.json b/launchers/standalone/src/main/resources/example.json new file mode 100644 index 000000000..1ef819b32 --- /dev/null +++ b/launchers/standalone/src/main/resources/example.json @@ -0,0 +1,312 @@ +[ + { + "currentDate":"2020-12-01", + "tableName":"内容库产品", + "fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "question":"比较jackjchen和robinlee在内容库的访问次数", + "priorSchemaLinks":"['jackjchen'->用户名, 'robinlee'->用户名]", + "analysis":"让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问:\n“比较jackjchen和robinlee”,所以我们需要column=[用户名],cell values = ['jackjchen', 'robinlee'],所以有[用户名:('jackjchen', 'robinlee')]\n”内容库的访问次数“,所以我们需要column=[访问次数]", + "schemaLinks":"[\"用户名\":(\"'jackjchen'\", \"'robinlee'\"), \"访问次数\"]", + "sql":"select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee')" + }, + { + "currentDate":"2022-11-06", + "tableName":"内容库产品", + "fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "question":"内容库近12个月访问人数 按部门", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问:\n”内容库近12个月“,所以我们需要column=[数据日期],cell values = [12],所以有[数据日期:(12)]\n“访问人数”,所以我们需要column=[访问人数]\n”按部门“,所以我们需要column=[部门]", + "schemaLinks":"[\"数据日期\":(12), \"访问人数\", \"部门\"]", + "sql":"select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 " + }, + { + "currentDate":"2023-04-21", + "tableName":"内容库产品", + "fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "question":"内容库美术部、技术研发部的访问时长", + "priorSchemaLinks":"['美术部'->部门, '技术研发部'->部门]", + "analysis":"让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问:\n“访问时长”,所以我们需要column=[访问时长]\n”内容库美术部、技术研发部“,所以我们需要column=[部门], cell values = ['美术部', '技术研发部'],所以有[部门:('美术部', '技术研发部')]", + "schemaLinks":"[\"访问时长\", \"部门\":(\"'美术部'\", \"'技术研发部'\")]", + "sql":"select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部')" + }, + { + "currentDate":"2023-08-21", + "tableName":"严选", + "fieldsList":"[\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]", + "question":"近3天海田飞系MPPM结算播放份额", + "priorSchemaLinks":"['海田飞系'->严选版权归属系]", + "analysis":"让我们一步一步地思考。在问题“近3天海田飞系MPPM结算播放份额“中,我们被问:\n“MPPM结算播放份额”,所以我们需要column=[结算播放份额], \n”海田飞系“,所以我们需要column=[严选版权归属系], cell values = ['海田飞系'],所以有[严选版权归属系:('海田飞系')],\n”近3天“,所以我们需要column=[数据日期], cell values = [3],所以有[数据日期:(3)]", + "schemaLinks":"[\"结算播放份额\", \"严选版权归属系\":(\"'海田飞系'\"), \"数据日期\":(3)]", + "sql":"select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 " + }, + { + "currentDate":"2023-05-22", + "tableName":"歌曲库", + "fieldsList":"[\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]", + "question":"对比近7天翻唱版和纯音乐的歌曲播放量", + "priorSchemaLinks":"['纯音乐'->语种, '翻唱版'->歌曲版本]", + "analysis":"让我们一步一步地思考。在问题“对比近3天翻唱版和纯音乐的歌曲播放量“中,我们被问:\n“歌曲播放量”,所以我们需要column=[结算播放量]\n”翻唱版“,所以我们需要column=[歌曲版本], cell values = ['翻唱版'],所以有[歌曲版本:('翻唱版')]\n”和纯音乐的歌曲“,所以我们需要column=[语种], cell values = ['纯音乐'],所以有[语种:('纯音乐')]\n”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]", + "schemaLinks":"[\"结算播放量\", \"歌曲版本\":(\"'翻唱版'\"), \"语种\":(\"'纯音乐'\"), \"数据日期\":(7)]", + "sql":"select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 " + }, + { + "currentDate":"2023-05-31", + "tableName":"艺人库", + "fieldsList":"[\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]", + "question":"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数", + "priorSchemaLinks":"['1527896'->MPPM歌手ID, '1565463'->MPPM歌手ID, '2141459'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数“中,我们被问:\n“粉丝数”,所以我们需要column=[粉丝数]\n”陈拙悬、孟梅琦、赖媚韵“,所以我们需要column=[歌手名], cell values = ['陈拙悬', '孟梅琦', '赖媚韵'],所以有[歌手名:('陈拙悬', '孟梅琦', '赖媚韵')]", + "schemaLinks":"[\"粉丝数\", \"歌手名\":(\"'陈拙悬'\", \"'孟梅琦'\", \"'赖媚韵'\")]", + "sql":"select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵')" + }, + { + "currentDate":"2023-07-31", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"播放量大于1万的歌曲有多少", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“播放量大于1万的歌曲有多少“中,我们被问:\n“歌曲有多少”,所以我们需要column=[歌曲名]\n”播放量大于1万的“,所以我们需要column=[结算播放量], cell values = [10000],所以有[结算播放量:(10000)]", + "schemaLinks":"[\"歌曲名\", \"结算播放量\":(10000)]", + "sql":"select 歌曲名 from 歌曲库 where 结算播放量 > 10000" + }, + { + "currentDate":"2023-07-31", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库访问时长小于1小时,且来自美术部的用户是哪些", + "priorSchemaLinks":"['美术部'->部门]", + "analysis":"让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问:\n“用户是哪些”,所以我们需要column=[用户名]\n”美术部的“,所以我们需要column=[部门], cell values = ['美术部'],所以有[部门:('美术部')]\n”访问时长小于1小时“,所以我们需要column=[访问时长], cell values = [1],所以有[访问时长:(1)]", + "schemaLinks":"[\"用户名\", \"部门\":(\"'美术部'\"), \"访问时长\":(1)]", + "sql":"select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1" + }, + { + "currentDate":"2023-08-31", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库pv最高的用户有哪些", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问:\n“用户有哪些”,所以我们需要column=[用户名]\n”pv最高的“,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]", + "schemaLinks":"[\"用户名\", \"访问次数\":(1)]", + "sql":"select 用户名 from 内容库产品 order by 访问次数 desc limit 1" + }, + { + "currentDate":"2023-08-31", + "tableName":"艺人库", + "fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", + "question":"近90天袁亚伟播放量平均值是多少", + "priorSchemaLinks":"['152789226'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“近90天袁亚伟播放量平均值是多少“中,我们被问:\n“播放量平均值是多少”,所以我们需要column=[结算播放量]\n”袁亚伟“,所以我们需要column=[歌手名], cell values = ['袁亚伟'],所以有[歌手名:('袁亚伟')]\n”近90天“,所以我们需要column=[数据日期], cell values = [90],所以有[数据日期:(90)]", + "schemaLinks":"[\"结算播放量\", \"歌手名\":(\"'袁亚伟'\"), \"数据日期\":(90)]", + "sql":"select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 " + }, + { + "currentDate":"2023-08-31", + "tableName":"艺人库", + "fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", + "question":"周倩倩近7天结算播放量总和是多少", + "priorSchemaLinks":"['199509'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“周倩倩近7天结算播放量总和是多少“中,我们被问:\n“结算播放量总和是多少”,所以我们需要column=[结算播放量]\n”周倩倩“,所以我们需要column=[歌手名], cell values = ['周倩倩'],所以有[歌手名:('周倩倩')]\n”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]", + "schemaLinks":"[\"结算播放量\", \"歌手名\":(\"'周倩倩'\"), \"数据日期\":(7)]", + "sql":"select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 " + }, + { + "currentDate":"2023-09-14", + "tableName":"内容库产品", + "fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", + "question":"内容库访问次数大于1k的部门是哪些", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问:\n“部门是哪些”,所以我们需要column=[部门]\n”访问次数大于1k的“,所以我们需要column=[访问次数], cell values = [1000],所以有[访问次数:(1000)]", + "schemaLinks":"[\"部门\", \"访问次数\":(1000)]", + "sql":"select 部门 from 内容库产品 where 访问次数 > 1000" + }, + { + "currentDate":"2023-09-18", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"陈亿训唱的所有的播放量大于20k的孤勇者有哪些", + "priorSchemaLinks":"['199509'->MPPM歌手ID, '1527123'->MPPM歌曲ID]", + "analysis":"让我们一步一步地思考。在问题“陈亿训唱的所有的播放量大于20k的孤勇者有哪些“中,我们被问:\n“孤勇者有哪些”,所以我们需要column=[歌曲名], cell values = ['孤勇者'],所以有[歌曲名:('孤勇者')]\n”播放量大于20k的“,所以我们需要column=[结算播放量], cell values = [20000],所以有[结算播放量:(20000)]\n”陈亿训唱的“,所以我们需要column=[歌手名], cell values = ['陈亿训'],所以有[歌手名:('陈亿训')]", + "schemaLinks":"[\"歌曲名\":(\"'孤勇者'\"), \"结算播放量\":(20000), \"歌手名\":(\"'陈亿训'\")]", + "sql":"select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者'" + }, + { + "currentDate":"2023-09-18", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"周洁轮去年发布的歌曲有哪些", + "priorSchemaLinks":"['23109'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“周洁轮去年发布的歌曲有哪些“中,我们被问:\n“歌曲有哪些”,所以我们需要column=[歌曲名]\n”去年发布的“,所以我们需要column=[发布时间], cell values = [1],所以有[发布时间:(1)]\n”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]", + "schemaLinks":"[\"歌曲名\", \"发布时间\":(1), \"歌手名\":(\"'周洁轮'\")]", + "sql":"select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮'" + }, + { + "currentDate":"2023-09-11", + "tableName":"艺人库", + "fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", + "question":"我想要近半年签约的播放量前十的歌手有哪些", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“我想要近半年签约的播放量前十的歌手“中,我们被问:\n“歌手有哪些”,所以我们需要column=[歌手名]\n”播放量前十的“,所以我们需要column=[结算播放量], cell values = [10],所以有[结算播放量:(10)]\n”近半年签约的“,所以我们需要column=[签约日期], cell values = [0.5],所以有[签约日期:(0.5)]", + "schemaLinks":"[\"歌手名\", \"结算播放量\":(10), \"签约日期\":(0.5)]", + "sql":"select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 order by 结算播放量 desc limit 10" + }, + { + "currentDate":"2023-08-12", + "tableName":"歌曲库", + "fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", + "question":"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“最近一年发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”最近一年发行的“,所以我们需要column=[发行日期], cell values = [1],所以有[发行日期:(1)]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]", + "schemaLinks":"[\"歌曲名\", \"发行日期\":(1), \"数据日期\":(7), \"结算播放量\":(10000000)]", + "sql":"select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000" + }, + { + "currentDate":"2023-08-12", + "tableName":"歌曲库", + "fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", + "question":"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“今年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”今年以来发行的“,所以我们需要column=[发行日期], cell values = [0],所以有[发行日期:(0)]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]", + "schemaLinks":"[\"歌曲名\", \"发行日期\":(0), \"数据日期\":(7), \"结算播放量\":(10000000)]", + "sql":"select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000" + }, + { + "currentDate":"2023-08-12", + "tableName":"歌曲库", + "fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", + "question":"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的", + "priorSchemaLinks":"['514129144'->MPPM歌曲ID]", + "analysis":"让我们一步一步地思考。在问题“2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”2023年以来发行的“,所以我们需要column=[发行日期], cell values = ['2023-01-01'],所以有[发行日期:('2023-01-01')]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]", + "schemaLinks":"[\"歌曲名\", \"发行日期\":(\"'2023-01-01'\"), \"数据日期\":(7), \"结算播放量\":(10000000)]", + "sql":"select 歌曲名 from 歌曲库 where 发行日期 >= '2023-01-01' and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000" + }, + { + "currentDate":"2023-08-01", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"周洁轮2023年6月之后发布的歌曲有哪些", + "priorSchemaLinks":"['23109'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“周洁轮2023年6月之后发布的歌曲有哪些“中,我们被问:\n“歌曲有哪些”,所以我们需要column=[歌曲名]\n”2023年6月之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-06-01'],所以有[发布时间:('2023-06-01')]\n”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]", + "schemaLinks":"[\"歌曲名\", \"发布时间\":(\"'2023-06-01'\"), \"歌手名\":(\"'周洁轮'\")]", + "sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2023-06-01' and 歌手名 = '周洁轮'" + }, + { + "currentDate":"2023-08-01", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?", + "priorSchemaLinks":"['2312311'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?“中,我们被问:\n“歌曲中,有哪些”,所以我们需要column=[歌曲名]\n“播放量大于500W的”,所以我们需要column=[结算播放量], cell values = [5000000],所以有[结算播放量:(5000000)]\n”邓梓琦在2023年1月5日之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-01-05'],所以有[发布时间:('2023-01-05')]\n”邓梓琦“,所以我们需要column=[歌手名], cell values = ['邓梓琦'],所以有[歌手名:('邓梓琦')]", + "schemaLinks":"[\"歌曲名\", \"结算播放量\":(5000000), \"发布时间\":(\"'2023-01-05'\"), \"歌手名\":(\"'邓梓琦'\")]", + "sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2023-01-05' and 歌手名 = '邓梓琦' and 结算播放量 > 5000000" + }, + { + "currentDate":"2023-09-17", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"2023年6月以后,张亮英播放量大于200万的歌曲有哪些?", + "priorSchemaLinks":"['45453'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“2023年6月以后,张亮英播放量大于200万的歌曲有哪些?“中,我们被问:\n“播放量大于200万的”,所以我们需要column=[结算播放量], cell values = [2000000],所以有[结算播放量:(2000000)]\n”2023年6月以后,张亮英“,所以我们需要column=[数据日期, 歌手名], cell values = ['2023-06-01', '张亮英'],所以有[数据日期:('2023-06-01'), 歌手名:('张亮英')],\n”歌曲有哪些“,所以我们需要column=[歌曲名]", + "schemaLinks":"[\"结算播放量\":(2000000), \"数据日期\":(\"'2023-06-01'\"), \"歌手名\":(\"'张亮英'\"), \"歌曲名\"]", + "sql":"select 歌曲名 from 歌曲库 where 数据日期 >= '2023-06-01' and 歌手名 = '张亮英' and 结算播放量 > 2000000" + }, + { + "currentDate":"2023-08-16", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些", + "priorSchemaLinks":"['23109'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些“中,我们被问:\n“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]\n”2021年6月以后发布的“,所以我们需要column=[发布时间], cell values = ['2021-06-01'],所以有[发布时间:('2021-06-01')]\n”李雨纯“,所以我们需要column=[歌手名], cell values = ['李雨纯'],所以有[歌手名:('李雨纯')]", + "schemaLinks":"[\"结算播放量\":(200000), \"发布时间\":(\"'2021-06-01'\"), \"歌手名\":(\"'李雨纯'\")]", + "sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2021-06-01' and 歌手名 = '李雨纯' and 结算播放量 > 200000" + }, + { + "currentDate":"2023-08-16", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", + "question":"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些", + "priorSchemaLinks":"['4234234'->MPPM歌手ID]", + "analysis":"让我们一步一步地思考。在问题“刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些“中,我们被问:\n“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]\n”1992年4月2日到2020年5月2日之间发布的“, 所以我们需要column=[发布时间], cell values = ['1992-04-02', '2020-05-02'],所以有[发布时间:('1992-04-02', '2020-05-02')]\n”刘锝桦“,所以我们需要column=[歌手名], cell values = ['刘锝桦'],所以有[歌手名:('刘锝桦')]", + "schemaLinks":"[\"结算播放量\":(200000), \"发布时间\":(\"'1992-04-02'\", \"'2020-05-02'\"), \"歌手名\":(\"'刘锝桦'\")]", + "sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '1992-04-02' and 发布时间 <= '2020-05-02' and 歌手名 = '刘锝桦' and 结算播放量 > 200000" + }, + { + "currentDate":"2023-09-04", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库近30天访问次数的平均数", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问:\n“访问次数的平均数”,所以我们需要column=[访问次数]\n”内容库近30天“,所以我们需要column=[数据日期], cell values = [30],所以有[数据日期:(30)]", + "schemaLinks":"[\"访问次数\", \"数据日期\":(30)]", + "sql":"select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 " + }, + { + "currentDate":"2023-09-04", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库近半年哪个月的访问次数汇总最高", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问:\n“访问次数汇总最高”,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]\n”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]", + "schemaLinks":"[\"访问次数\":(1), \"数据日期\":(0.5)]", + "sql":"select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1" + }, + { + "currentDate":"2023-09-04", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库近半年每个月的平均访问次数", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问:\n“每个月的平均访问次数”,所以我们需要column=[访问次数]\n”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]", + "schemaLinks":"[\"访问次数\", \"数据日期\":(0.5)]", + "sql":"select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期)" + }, + { + "currentDate":"2023-09-10", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"内容库 按部门统计访问次数 top10 的部门", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问:\n“访问次数 top10 的部门”,所以我们需要column=[访问次数], cell values = [10],所以有[访问次数:(10)]\n”内容库 按部门统计“,所以我们需要column=[部门]", + "schemaLinks":"[\"访问次数\":(10), \"部门\"]", + "sql":"select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10" + }, + { + "currentDate":"2023-09-10", + "tableName":"内容库产品", + "fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", + "question":"超音速 近7个月,月度总访问量超过 2万的月份", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“超音速 近7个月,月度总访问量超过 2万的月份“中,我们被问:\n“月度总访问量超过 2万的月份”,所以我们需要column=[访问次数], cell values = [20000],所以有[访问次数:(20000)]\n”超音速 近7个月“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]", + "schemaLinks":"[\"访问次数\":(20000), \"数据日期\":(7)]", + "sql":"select MONTH(数据日期) from 内容库产品 where datediff('day', 数据日期, '2023-09-10') <= 7 group by MONTH(数据日期) having sum(访问次数) > 20000" + }, + { + "currentDate":"2023-09-10", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", + "question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量“中,我们被问:\n“按月粒度来统计近1年的运营播放量”,所以我们需要column=[运营播放量, 数据日期], cell values = [1],所以有[运营播放量, 数据日期:(1)]\n”按播放量取top 100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]\n“2022年7月到2023年7月之间发布到歌曲”,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]", + "schemaLinks":"[\"运营播放量\", \"数据日期\":(1), \"播放量\":(100), \"发布日期\":(\"'2022-07-01'\", \"'2023-07-01'\")]", + "sql":"select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期)" + }, + { + "currentDate":"2023-09-10", + "tableName":"歌曲库", + "fieldsList":"[\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", + "question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份", + "priorSchemaLinks":"[]", + "analysis":"让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份“中,我们被问:\n“筛选出其中运营播放量之和大于2k的月份”,所以我们需要column=[运营播放量], cell values = [2000],所以有[运营播放量:(2000)]\n”按月粒度来统计近1年的运营播放量之和“,所以我们需要column=[数据日期], cell values = [1],所以有[数据日期:(1)]\n”按播放量取top100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]\n”2022年7月到2023年7月之间发布到歌曲“,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]", + "schemaLinks":"[\"运营播放量\":(2000), \"数据日期\":(1), \"播放量\":(100), \"发布日期\":(\"'2022-07-01'\", \"'2023-07-01'\")]", + "sql":"select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期) having sum(运营播放量) > 2000" + }, + { + "currentDate":"2023-11-01", + "tableName":"营销月模型", + "fieldsList":"[\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]", + "question":"今年智能机在哪个国家的销量之和最高", + "priorSchemaLinks":"['智能机'->机型类别]", + "analysis":"让我们一步一步地思考。在问题“今年智能机在哪个国家的销量之和最高“中,我们被问:\n“销量最高”,所以我们需要column=[销量], cell values = [1],所以有[销量:(1)]\n”今年“,所以我们需要column=[数据日期], cell values = ['2023-01-01', '2023-11-01'],所以有[数据日期:('2023-01-01', '2023-11-01')]\n”智能机“,所以我们需要column=[机型类别], cell values = ['智能机'],所以有[机型类别:('智能机')]", + "schemaLinks":"[\"销量\":(1), \"数据日期\":(\"'2023-01-01'\", \"'2023-11-01'\"), \"机型类别\":(\"'智能机'\")]", + "sql":"select 国家中文名, sum(销量) from 营销月模型 where 机型类别 = '智能机' and 数据日期 >= '2023-01-01' and 数据日期 <= '2023-11-01' group by 国家中文名 order by sum(销量) desc limit 1" + } +] \ No newline at end of file diff --git a/pom.xml b/pom.xml index c1b1cb7ac..42185b82d 100644 --- a/pom.xml +++ b/pom.xml @@ -71,6 +71,7 @@ 22.3.0 2.2.6 3.17 + 0.24.0 @@ -94,6 +95,57 @@ guava ${guava.version} + + + dev.langchain4j + langchain4j-parent + ${langchain4j.version} + + + dev.langchain4j + langchain4j + ${langchain4j.version} + + + dev.langchain4j + langchain4j-core + ${langchain4j.version} + + + dev.langchain4j + langchain4j-spring-boot-starter + ${langchain4j.version} + + + dev.langchain4j + langchain4j-open-ai + ${langchain4j.version} + + + dev.langchain4j + langchain4j-hugging-face + ${langchain4j.version} + + + dev.langchain4j + langchain4j-chroma + ${langchain4j.version} + + + dev.langchain4j + langchain4j-embeddings + ${langchain4j.version} + + + dev.langchain4j + langchain4j-hugging-face + ${langchain4j.version} + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2 + ${langchain4j.version} + diff --git a/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/domain/pojo/Database.java b/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/domain/pojo/Database.java index 2e743a2f0..e7c387336 100644 --- a/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/domain/pojo/Database.java +++ b/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/domain/pojo/Database.java @@ -1,11 +1,10 @@ package com.tencent.supersonic.semantic.model.domain.pojo; +import com.google.common.collect.Lists; import com.tencent.supersonic.common.pojo.RecordInfo; -import lombok.Data; -import org.assertj.core.util.Lists; - import java.util.List; +import lombok.Data; @Data public class Database extends RecordInfo {