mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
[improvement](python) LLM parsing related services support Python service and Java service invocation (#418)
This commit is contained in:
@@ -116,6 +116,19 @@
|
||||
<version>${mockito-inline.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!--langchain4j-->
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-open-ai</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-chroma</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -59,6 +59,19 @@ public class OptimizationConfig {
|
||||
|
||||
@Value("${s2SQL.use.switch:true}")
|
||||
private boolean useS2SqlSwitch;
|
||||
|
||||
@Value("${text2sql.example.num:10}")
|
||||
private int text2sqlExampleNum;
|
||||
|
||||
@Value("${text2sql.fewShots.num:10}")
|
||||
private int text2sqlFewShotsNum;
|
||||
|
||||
@Value("${text2sql.self.consistency.num:5}")
|
||||
private int text2sqlSelfConsistencyNum;
|
||||
|
||||
@Value("${text2sql.collection.name:text2dsl_agent_collection}")
|
||||
private String text2sqlCollectionName;
|
||||
|
||||
@Autowired
|
||||
private SysParameterService sysParameterService;
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
package com.tencent.supersonic.chat.llm;
|
||||
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.chat.llm.prompt.FunctionCallPromptGenerator;
|
||||
import com.tencent.supersonic.chat.llm.prompt.OutputFormat;
|
||||
import com.tencent.supersonic.chat.llm.prompt.SqlExampleLoader;
|
||||
import com.tencent.supersonic.chat.llm.prompt.SqlPromptGenerator;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import dev.langchain4j.data.message.AiMessage;
|
||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||
import dev.langchain4j.model.input.Prompt;
|
||||
import dev.langchain4j.model.input.PromptTemplate;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class EmbedLLMInterpreter implements LLMInterpreter {
|
||||
|
||||
public LLMResp query2sql(LLMReq llmReq, Long modelId) {
|
||||
|
||||
ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class);
|
||||
|
||||
SqlExampleLoader sqlExampleLoader = ContextUtils.getBean(SqlExampleLoader.class);
|
||||
|
||||
OptimizationConfig config = ContextUtils.getBean(OptimizationConfig.class);
|
||||
|
||||
List<Map<String, String>> sqlExamples = sqlExampleLoader.retrieverSqlExamples(llmReq.getQueryText(),
|
||||
config.getText2sqlCollectionName(), config.getText2sqlFewShotsNum());
|
||||
|
||||
String queryText = llmReq.getQueryText();
|
||||
String modelName = llmReq.getSchema().getModelName();
|
||||
List<String> fieldNameList = llmReq.getSchema().getFieldNameList();
|
||||
List<ElementValue> linking = llmReq.getLinking();
|
||||
|
||||
SqlPromptGenerator sqlPromptGenerator = ContextUtils.getBean(SqlPromptGenerator.class);
|
||||
String linkingPromptStr = sqlPromptGenerator.generateSchemaLinkingPrompt(queryText, modelName, fieldNameList,
|
||||
linking, sqlExamples);
|
||||
|
||||
Prompt linkingPrompt = PromptTemplate.from(JsonUtil.toString(linkingPromptStr)).apply(new HashMap<>());
|
||||
Response<AiMessage> linkingResult = chatLanguageModel.generate(linkingPrompt.toSystemMessage());
|
||||
|
||||
String schemaLinkStr = OutputFormat.schemaLinkParse(linkingResult.content().text());
|
||||
|
||||
String generateSqlPrompt = sqlPromptGenerator.generateSqlPrompt(queryText, modelName, schemaLinkStr,
|
||||
llmReq.getCurrentDate(), sqlExamples);
|
||||
|
||||
Prompt sqlPrompt = PromptTemplate.from(JsonUtil.toString(generateSqlPrompt)).apply(new HashMap<>());
|
||||
Response<AiMessage> sqlResult = chatLanguageModel.generate(sqlPrompt.toSystemMessage());
|
||||
|
||||
LLMResp result = new LLMResp();
|
||||
result.setQuery(queryText);
|
||||
result.setSchemaLinkingOutput(linkingPromptStr);
|
||||
result.setSchemaLinkStr(schemaLinkStr);
|
||||
result.setModelName(modelName);
|
||||
result.setSqlOutput(sqlResult.content().text());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionResp requestFunction(FunctionReq functionReq) {
|
||||
|
||||
FunctionCallPromptGenerator promptGenerator = ContextUtils.getBean(FunctionCallPromptGenerator.class);
|
||||
|
||||
String functionCallPrompt = promptGenerator.generateFunctionCallPrompt(functionReq.getQueryText(),
|
||||
functionReq.getPluginConfigs());
|
||||
|
||||
ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class);
|
||||
|
||||
String functionSelect = chatLanguageModel.generate(functionCallPrompt);
|
||||
|
||||
return OutputFormat.functionCallParse(functionSelect);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
package com.tencent.supersonic.chat.llm;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.tencent.supersonic.chat.config.LLMParserConfig;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionCallConfig;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
@Slf4j
|
||||
public class HttpLLMInterpreter implements LLMInterpreter {
|
||||
|
||||
public LLMResp query2sql(LLMReq llmReq, Long modelId) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.info("requestLLM request, modelId:{},llmReq:{}", modelId, llmReq);
|
||||
try {
|
||||
LLMParserConfig llmParserConfig = ContextUtils.getBean(LLMParserConfig.class);
|
||||
|
||||
URL url = new URL(new URL(llmParserConfig.getUrl()), llmParserConfig.getQueryToSqlPath());
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers);
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
ResponseEntity<LLMResp> responseEntity = restTemplate.exchange(url.toString(), HttpMethod.POST, entity,
|
||||
LLMResp.class);
|
||||
|
||||
log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}",
|
||||
System.currentTimeMillis() - startTime, url, entity, responseEntity.getBody());
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestLLM error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public FunctionResp requestFunction(FunctionReq functionReq) {
|
||||
FunctionCallConfig functionCallInfoConfig = ContextUtils.getBean(FunctionCallConfig.class);
|
||||
String url = functionCallInfoConfig.getUrl() + functionCallInfoConfig.getPluginSelectPath();
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
long startTime = System.currentTimeMillis();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JSON.toJSONString(functionReq), headers);
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
try {
|
||||
log.info("requestFunction functionReq:{}", JsonUtil.toString(functionReq));
|
||||
ResponseEntity<FunctionResp> responseEntity = restTemplate.exchange(requestUrl, HttpMethod.POST, entity,
|
||||
FunctionResp.class);
|
||||
log.info("requestFunction responseEntity:{},cost:{}", responseEntity,
|
||||
System.currentTimeMillis() - startTime);
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestFunction error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.tencent.supersonic.chat.llm;
|
||||
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
|
||||
/**
|
||||
* Unified interpreter for invoking the llm layer.
|
||||
*/
|
||||
public interface LLMInterpreter {
|
||||
|
||||
|
||||
LLMResp query2sql(LLMReq llmReq, Long modelId);
|
||||
|
||||
FunctionResp requestFunction(FunctionReq functionReq);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.tencent.supersonic.chat.llm.listener;
|
||||
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.chat.llm.EmbedLLMInterpreter;
|
||||
import com.tencent.supersonic.chat.llm.LLMInterpreter;
|
||||
import com.tencent.supersonic.chat.llm.prompt.SqlExample;
|
||||
import com.tencent.supersonic.chat.llm.prompt.SqlExampleLoader;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@Order(4)
|
||||
public class EmbeddingInitListener implements CommandLineRunner {
|
||||
|
||||
protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter();
|
||||
@Autowired
|
||||
private SqlExampleLoader sqlExampleLoader;
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
|
||||
@Override
|
||||
public void run(String... args) {
|
||||
initSqlExamples();
|
||||
}
|
||||
|
||||
public void initSqlExamples() {
|
||||
try {
|
||||
if (llmInterpreter instanceof EmbedLLMInterpreter) {
|
||||
List<SqlExample> sqlExamples = sqlExampleLoader.getSqlExamples();
|
||||
String collectionName = optimizationConfig.getText2sqlCollectionName();
|
||||
sqlExampleLoader.addEmbeddingStore(sqlExamples, collectionName);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("initSqlExamples error", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
import com.tencent.supersonic.chat.plugin.PluginParseConfig;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class FunctionCallPromptGenerator {
|
||||
|
||||
public String generateFunctionCallPrompt(String queryText, List<PluginParseConfig> toolConfigList) {
|
||||
List<String> toolExplainList = toolConfigList.stream()
|
||||
.map(this::constructPluginPrompt)
|
||||
.collect(Collectors.toList());
|
||||
String functionList = String.join(InputFormat.SEPERATOR, toolExplainList);
|
||||
return constructTaskPrompt(queryText, functionList);
|
||||
}
|
||||
|
||||
public String constructPluginPrompt(PluginParseConfig parseConfig) {
|
||||
String toolName = parseConfig.getName();
|
||||
String toolDescription = parseConfig.getDescription();
|
||||
List<String> toolExamples = parseConfig.getExamples();
|
||||
|
||||
StringBuilder prompt = new StringBuilder();
|
||||
prompt.append("【工具名称】\n").append(toolName).append("\n");
|
||||
prompt.append("【工具描述】\n").append(toolDescription).append("\n");
|
||||
prompt.append("【工具适用问题示例】\n");
|
||||
for (String example : toolExamples) {
|
||||
prompt.append(example).append("\n");
|
||||
}
|
||||
return prompt.toString();
|
||||
}
|
||||
|
||||
public String constructTaskPrompt(String queryText, String functionList) {
|
||||
String instruction = String.format("问题为:%s\n请根据问题和工具的描述,选择对应的工具,完成任务。"
|
||||
+ "请注意,只能选择1个工具。请一步一步地分析选择工具的原因(每个工具的【工具适用问题示例】是选择的重要参考依据),"
|
||||
+ "并给出最终选择,输出格式为json,key为’分析过程‘, ’选择工具‘", queryText);
|
||||
|
||||
return String.format("工具选择如下:\n\n%s\n\n【任务说明】\n%s", functionList, instruction);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class InputFormat {
|
||||
|
||||
public static final String SEPERATOR = "\n\n";
|
||||
|
||||
public static String format(String template, List<String> templateKey,
|
||||
List<Map<String, String>> toFormatList) {
|
||||
List<String> result = new ArrayList<>();
|
||||
|
||||
for (Map<String, String> formatItem : toFormatList) {
|
||||
Map<String, String> retrievalMeta = subDict(formatItem, templateKey);
|
||||
result.add(format(template, retrievalMeta));
|
||||
}
|
||||
|
||||
return String.join(SEPERATOR, result);
|
||||
}
|
||||
|
||||
|
||||
public static String format(String input, Map<String, String> replacements) {
|
||||
for (Map.Entry<String, String> entry : replacements.entrySet()) {
|
||||
input = input.replace(entry.getKey(), entry.getValue());
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
||||
private static Map<String, String> subDict(Map<String, String> dict, List<String> keys) {
|
||||
Map<String, String> subDict = new HashMap<>();
|
||||
for (String key : keys) {
|
||||
if (dict.containsKey(key)) {
|
||||
subDict.put(key, dict.get(key));
|
||||
}
|
||||
}
|
||||
return subDict;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/***
|
||||
* output format
|
||||
*/
|
||||
@Slf4j
|
||||
public class OutputFormat {
|
||||
|
||||
public static final String PATTERN = "\\{[^{}]+\\}";
|
||||
|
||||
public static String schemaLinkParse(String schemaLinkOutput) {
|
||||
try {
|
||||
schemaLinkOutput = schemaLinkOutput.trim();
|
||||
String pattern = "Schema_links:(.*)";
|
||||
Pattern regexPattern = Pattern.compile(pattern, Pattern.DOTALL);
|
||||
Matcher matcher = regexPattern.matcher(schemaLinkOutput);
|
||||
if (matcher.find()) {
|
||||
schemaLinkOutput = matcher.group(1).trim();
|
||||
} else {
|
||||
schemaLinkOutput = null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("", e);
|
||||
schemaLinkOutput = null;
|
||||
}
|
||||
return schemaLinkOutput;
|
||||
}
|
||||
|
||||
|
||||
public static FunctionResp functionCallParse(String llmOutput) {
|
||||
try {
|
||||
String[] findResult = llmOutput.split(PATTERN);
|
||||
String result = findResult[0].trim();
|
||||
|
||||
Map<String, String> resultDict = JsonUtil.toMap(result, String.class, String.class);
|
||||
log.info("result:{},resultDict:{}", result, resultDict);
|
||||
|
||||
String selection = resultDict.get("选择工具");
|
||||
FunctionResp resp = new FunctionResp();
|
||||
resp.setToolSelection(selection);
|
||||
return resp;
|
||||
} catch (Exception e) {
|
||||
log.error("", e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class SqlExample {
|
||||
|
||||
@JsonProperty("currentDate")
|
||||
private String currentDate;
|
||||
|
||||
@JsonProperty("tableName")
|
||||
private String tableName;
|
||||
|
||||
@JsonProperty("fieldsList")
|
||||
private String fieldsList;
|
||||
|
||||
@JsonProperty("question")
|
||||
private String question;
|
||||
|
||||
@JsonProperty("priorSchemaLinks")
|
||||
private String priorSchemaLinks;
|
||||
|
||||
@JsonProperty("analysis")
|
||||
private String analysis;
|
||||
|
||||
@JsonProperty("schemaLinks")
|
||||
private String schemaLinks;
|
||||
|
||||
@JsonProperty("sql")
|
||||
private String sql;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.tencent.supersonic.chat.llm.vectordb.EmbeddingStoreOperator;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class SqlExampleLoader {
|
||||
|
||||
private static final String EXAMPLE_JSON_FILE = "example.json";
|
||||
@Autowired
|
||||
private EmbeddingStoreOperator embeddingStoreOperator;
|
||||
private TypeReference<List<SqlExample>> valueTypeRef = new TypeReference<List<SqlExample>>() {
|
||||
};
|
||||
|
||||
public List<SqlExample> getSqlExamples() throws IOException {
|
||||
ClassPathResource resource = new ClassPathResource(EXAMPLE_JSON_FILE);
|
||||
InputStream inputStream = resource.getInputStream();
|
||||
return JsonUtil.INSTANCE.getObjectMapper().readValue(inputStream, valueTypeRef);
|
||||
}
|
||||
|
||||
public void addEmbeddingStore(List<SqlExample> sqlExamples, String collectionName) {
|
||||
embeddingStoreOperator.addAll(sqlExamples, collectionName);
|
||||
}
|
||||
|
||||
public List<Map<String, String>> retrieverSqlExamples(String queryText, String collectionName, int maxResults) {
|
||||
List<TextSegment> textSegments = embeddingStoreOperator.retriever(queryText, collectionName, maxResults);
|
||||
|
||||
List<Map<String, String>> result = new ArrayList<>();
|
||||
for (TextSegment textSegment : textSegments) {
|
||||
if (Objects.nonNull(textSegment.metadata())) {
|
||||
result.add(textSegment.metadata().asMap());
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,66 @@
|
||||
package com.tencent.supersonic.chat.llm.prompt;
|
||||
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class SqlPromptGenerator {
|
||||
|
||||
public String generateSchemaLinkingPrompt(String question, String modelName, List<String> fieldsList,
|
||||
List<ElementValue> priorSchemaLinks, List<Map<String, String>> exampleList) {
|
||||
|
||||
String exampleTemplate = "Table {tableName}, columns = {fieldsList}, prior_schema_links = {priorSchemaLinks}\n"
|
||||
+ "问题:{question}\n分析:{analysis} 所以Schema_links是:\nSchema_links:{schemaLinks}";
|
||||
|
||||
List<String> exampleKeys = Arrays.asList("tableName", "fieldsList", "priorSchemaLinks", "question", "analysis",
|
||||
"schemaLinks");
|
||||
|
||||
String schemaLinkingPrompt = InputFormat.format(exampleTemplate, exampleKeys, exampleList);
|
||||
|
||||
String newCaseTemplate = "Table {tableName}, columns = {fieldsList}, prior_schema_links = {priorSchemaLinks}\n"
|
||||
+ "问题:{question}\n分析: 让我们一步一步地思考。";
|
||||
|
||||
String newCasePrompt = newCaseTemplate.replace("{tableName}", modelName)
|
||||
.replace("{fieldsList}", fieldsList.toString())
|
||||
.replace("{priorSchemaLinks}", getPriorSchemaLinks(priorSchemaLinks))
|
||||
.replace("{question}", question);
|
||||
|
||||
String instruction = "# 根据数据库的表结构,参考先验信息,找出为每个问题生成SQL查询语句的schema_links";
|
||||
return instruction + InputFormat.SEPERATOR + schemaLinkingPrompt + InputFormat.SEPERATOR + newCasePrompt;
|
||||
}
|
||||
|
||||
private String getPriorSchemaLinks(List<ElementValue> priorSchemaLinks) {
|
||||
return priorSchemaLinks.stream()
|
||||
.map(elementValue -> "'" + elementValue.getFieldName() + "'->" + elementValue.getFieldValue())
|
||||
.collect(Collectors.joining(",", "[", "]"));
|
||||
}
|
||||
|
||||
public String generateSqlPrompt(String question, String modelName, String schemaLinkStr, String dataDate,
|
||||
List<Map<String, String>> exampleList) {
|
||||
|
||||
List<String> exampleKeys = Arrays.asList("question", "currentDate", "tableName", "schemaLinks", "sql");
|
||||
String exampleTemplate = "问题:{question}\nCurrent_date:{currentDate}\nTable {tableName}\n"
|
||||
+ "Schema_links:{schemaLinks}\nSQL:{sql}";
|
||||
|
||||
String sqlExamplePrompt = InputFormat.format(exampleTemplate, exampleKeys, exampleList);
|
||||
|
||||
String newCaseTemplate = "问题:{question}\nCurrent_date:{currentDate}\nTable {tableName}\n"
|
||||
+ "Schema_links:{schemaLinks}\nSQL:";
|
||||
|
||||
String newCasePrompt = newCaseTemplate.replace("{question}", question)
|
||||
.replace("{currentDate}", dataDate)
|
||||
.replace("{tableName}", modelName)
|
||||
.replace("{schemaLinks}", schemaLinkStr);
|
||||
|
||||
String instruction = "# 根据schema_links为每个问题生成SQL查询语句";
|
||||
return instruction + InputFormat.SEPERATOR + sqlExamplePrompt + InputFormat.SEPERATOR + newCasePrompt;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.tencent.supersonic.chat.llm.vectordb;
|
||||
|
||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class EmbeddingStoreFactory {
|
||||
|
||||
private static Map<String, EmbeddingStore> collectionNameToStore = new ConcurrentHashMap<>();
|
||||
|
||||
|
||||
public static EmbeddingStore create(String collectionName) {
|
||||
return collectionNameToStore.computeIfAbsent(collectionName, k -> new InMemoryEmbeddingStore());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.tencent.supersonic.chat.llm.vectordb;
|
||||
|
||||
import com.tencent.supersonic.chat.llm.prompt.SqlExample;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import dev.langchain4j.data.document.Metadata;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.retriever.EmbeddingStoreRetriever;
|
||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EmbeddingStoreOperator {
|
||||
|
||||
@Autowired
|
||||
private EmbeddingModel embeddingModel;
|
||||
|
||||
public List<TextSegment> retriever(String text, String collectionName, int maxResults) {
|
||||
EmbeddingStore embeddingStore = EmbeddingStoreFactory.create(collectionName);
|
||||
EmbeddingStoreRetriever retriever = EmbeddingStoreRetriever.from(embeddingStore, embeddingModel, maxResults);
|
||||
return retriever.findRelevant(text);
|
||||
}
|
||||
|
||||
public List<String> addAll(List<SqlExample> sqlExamples, String collectionName) {
|
||||
List<Embedding> embeddings = new ArrayList<>();
|
||||
List<TextSegment> textSegments = new ArrayList<>();
|
||||
|
||||
for (SqlExample sqlExample : sqlExamples) {
|
||||
String question = sqlExample.getQuestion();
|
||||
Embedding embedding = embeddingModel.embed(question).content();
|
||||
embeddings.add(embedding);
|
||||
|
||||
Map<String, String> metaDataMap = JsonUtil.toMap(JsonUtil.toString(sqlExample), String.class,
|
||||
String.class);
|
||||
|
||||
TextSegment textSegment = TextSegment.from(question, new Metadata(metaDataMap));
|
||||
textSegments.add(textSegment);
|
||||
}
|
||||
return addAllInternal(embeddings, textSegments, collectionName);
|
||||
}
|
||||
|
||||
private List<String> addAllInternal(List<Embedding> embeddings, List<TextSegment> textSegments,
|
||||
String collectionName) {
|
||||
EmbeddingStore embeddingStore = EmbeddingStoreFactory.create(collectionName);
|
||||
return embeddingStore.addAll(embeddings, textSegments);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -17,7 +17,7 @@ import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.chat.service.AgentService;
|
||||
import com.tencent.supersonic.chat.service.LLMParserLayer;
|
||||
import com.tencent.supersonic.chat.llm.LLMInterpreter;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
@@ -46,6 +46,8 @@ import org.springframework.util.CollectionUtils;
|
||||
@Service
|
||||
public class LLMRequestService {
|
||||
|
||||
protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter();
|
||||
|
||||
protected SemanticInterpreter semanticInterpreter = ComponentFactory.getSemanticLayer();
|
||||
@Autowired
|
||||
private LLMParserConfig llmParserConfig;
|
||||
@@ -55,8 +57,7 @@ public class LLMRequestService {
|
||||
private SchemaService schemaService;
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
@Autowired
|
||||
private LLMParserLayer llmParserLayer;
|
||||
|
||||
|
||||
public boolean check(QueryContext queryCtx) {
|
||||
QueryReq request = queryCtx.getRequest();
|
||||
@@ -137,7 +138,7 @@ public class LLMRequestService {
|
||||
}
|
||||
|
||||
public LLMResp requestLLM(LLMReq llmReq, Long modelId) {
|
||||
return llmParserLayer.query2sql(llmReq, modelId);
|
||||
return llmInterpreter.query2sql(llmReq, modelId);
|
||||
}
|
||||
|
||||
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) {
|
||||
|
||||
@@ -2,11 +2,14 @@ package com.tencent.supersonic.chat.parser.plugin.embedding;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.llm.HttpLLMInterpreter;
|
||||
import com.tencent.supersonic.chat.llm.LLMInterpreter;
|
||||
import com.tencent.supersonic.chat.parser.ParseMode;
|
||||
import com.tencent.supersonic.chat.parser.plugin.PluginParser;
|
||||
import com.tencent.supersonic.chat.plugin.Plugin;
|
||||
import com.tencent.supersonic.chat.plugin.PluginManager;
|
||||
import com.tencent.supersonic.chat.plugin.PluginRecallResult;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import java.util.Comparator;
|
||||
@@ -22,10 +25,12 @@ import org.springframework.util.CollectionUtils;
|
||||
@Slf4j
|
||||
public class EmbeddingBasedParser extends PluginParser {
|
||||
|
||||
protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter();
|
||||
|
||||
@Override
|
||||
public boolean checkPreCondition(QueryContext queryContext) {
|
||||
EmbeddingConfig embeddingConfig = ContextUtils.getBean(EmbeddingConfig.class);
|
||||
if (StringUtils.isBlank(embeddingConfig.getUrl())) {
|
||||
if (StringUtils.isBlank(embeddingConfig.getUrl()) && llmInterpreter instanceof HttpLLMInterpreter) {
|
||||
return false;
|
||||
}
|
||||
List<Plugin> plugins = getPluginList(queryContext);
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
package com.tencent.supersonic.chat.parser.plugin.function;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.llm.HttpLLMInterpreter;
|
||||
import com.tencent.supersonic.chat.llm.LLMInterpreter;
|
||||
import com.tencent.supersonic.chat.parser.ParseMode;
|
||||
import com.tencent.supersonic.chat.parser.plugin.PluginParser;
|
||||
import com.tencent.supersonic.chat.plugin.Plugin;
|
||||
@@ -10,34 +11,29 @@ import com.tencent.supersonic.chat.plugin.PluginParseConfig;
|
||||
import com.tencent.supersonic.chat.plugin.PluginRecallResult;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.S2SQLQuery;
|
||||
import com.tencent.supersonic.chat.service.PluginService;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import java.net.URI;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
@Slf4j
|
||||
public class FunctionBasedParser extends PluginParser {
|
||||
|
||||
protected LLMInterpreter llmInterpreter = ComponentFactory.getLLMInterpreter();
|
||||
|
||||
@Override
|
||||
public boolean checkPreCondition(QueryContext queryContext) {
|
||||
FunctionCallConfig functionCallConfig = ContextUtils.getBean(FunctionCallConfig.class);
|
||||
String functionUrl = functionCallConfig.getUrl();
|
||||
if (StringUtils.isBlank(functionUrl)) {
|
||||
if (StringUtils.isBlank(functionUrl) && llmInterpreter instanceof HttpLLMInterpreter) {
|
||||
log.info("functionUrl:{}, skip function parser, queryText:{}", functionUrl,
|
||||
queryContext.getRequest().getQueryText());
|
||||
return false;
|
||||
@@ -88,7 +84,7 @@ public class FunctionBasedParser extends PluginParser {
|
||||
FunctionReq functionReq = FunctionReq.builder()
|
||||
.queryText(queryContext.getRequest().getQueryText())
|
||||
.pluginConfigs(pluginToFunctionCall).build();
|
||||
functionResp = requestFunction(functionReq);
|
||||
functionResp = llmInterpreter.requestFunction(functionReq);
|
||||
}
|
||||
return functionResp;
|
||||
}
|
||||
@@ -131,25 +127,4 @@ public class FunctionBasedParser extends PluginParser {
|
||||
return functionDOList;
|
||||
}
|
||||
|
||||
public FunctionResp requestFunction(FunctionReq functionReq) {
|
||||
FunctionCallConfig functionCallInfoConfig = ContextUtils.getBean(FunctionCallConfig.class);
|
||||
String url = functionCallInfoConfig.getUrl() + functionCallInfoConfig.getPluginSelectPath();
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
long startTime = System.currentTimeMillis();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JSON.toJSONString(functionReq), headers);
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
try {
|
||||
log.info("requestFunction functionReq:{}", JsonUtil.toString(functionReq));
|
||||
ResponseEntity<FunctionResp> responseEntity = restTemplate.exchange(requestUrl, HttpMethod.POST, entity,
|
||||
FunctionResp.class);
|
||||
log.info("requestFunction responseEntity:{},cost:{}", responseEntity,
|
||||
System.currentTimeMillis() - startTime);
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestFunction error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
package com.tencent.supersonic.chat.service;
|
||||
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
|
||||
/**
|
||||
* Unified wrapper for invoking the llmparser Python service layer.
|
||||
*/
|
||||
public interface LLMParserLayer {
|
||||
|
||||
LLMResp query2sql(LLMReq llmReq, Long modelId);
|
||||
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
package com.tencent.supersonic.chat.service.impl;
|
||||
|
||||
import com.tencent.supersonic.chat.config.LLMParserConfig;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.chat.service.LLMParserLayer;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import java.net.URL;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class LLMParserLayerImpl implements LLMParserLayer {
|
||||
|
||||
@Autowired
|
||||
private RestTemplate restTemplate;
|
||||
@Autowired
|
||||
private LLMParserConfig llmParserConfig;
|
||||
|
||||
public LLMResp query2sql(LLMReq llmReq, Long modelId) {
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.info("requestLLM request, modelId:{},llmReq:{}", modelId, llmReq);
|
||||
try {
|
||||
URL url = new URL(new URL(llmParserConfig.getUrl()), llmParserConfig.getQueryToSqlPath());
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers);
|
||||
ResponseEntity<LLMResp> responseEntity = restTemplate.exchange(url.toString(), HttpMethod.POST, entity,
|
||||
LLMResp.class);
|
||||
|
||||
log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}",
|
||||
System.currentTimeMillis() - startTime, url, entity, responseEntity.getBody());
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestLLM error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticCorrector;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticInterpreter;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
||||
import com.tencent.supersonic.chat.llm.LLMInterpreter;
|
||||
import com.tencent.supersonic.chat.parser.llm.s2sql.ModelResolver;
|
||||
import com.tencent.supersonic.chat.postprocessor.PostProcessor;
|
||||
import com.tencent.supersonic.chat.responder.execute.ExecuteResponder;
|
||||
@@ -20,10 +21,13 @@ public class ComponentFactory {
|
||||
private static List<SemanticParser> semanticParsers = new ArrayList<>();
|
||||
private static List<SemanticCorrector> s2SQLCorrections = new ArrayList<>();
|
||||
private static SemanticInterpreter semanticInterpreter;
|
||||
|
||||
private static LLMInterpreter llmInterpreter;
|
||||
private static List<PostProcessor> postProcessors = new ArrayList<>();
|
||||
private static List<ParseResponder> parseResponders = new ArrayList<>();
|
||||
private static List<ExecuteResponder> executeResponders = new ArrayList<>();
|
||||
private static ModelResolver modelResolver;
|
||||
|
||||
public static List<SchemaMapper> getSchemaMappers() {
|
||||
return CollectionUtils.isEmpty(schemaMappers) ? init(SchemaMapper.class, schemaMappers) : schemaMappers;
|
||||
}
|
||||
@@ -62,6 +66,13 @@ public class ComponentFactory {
|
||||
}
|
||||
|
||||
|
||||
public static LLMInterpreter getLLMInterpreter() {
|
||||
if (Objects.isNull(llmInterpreter)) {
|
||||
llmInterpreter = init(LLMInterpreter.class);
|
||||
}
|
||||
return llmInterpreter;
|
||||
}
|
||||
|
||||
public static ModelResolver getModelResolver() {
|
||||
if (Objects.isNull(modelResolver)) {
|
||||
modelResolver = init(ModelResolver.class);
|
||||
|
||||
@@ -19,6 +19,9 @@ com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
||||
com.tencent.supersonic.chat.corrector.GroupByCorrector, \
|
||||
com.tencent.supersonic.chat.corrector.HavingCorrector
|
||||
|
||||
com.tencent.supersonic.chat.llm.LLMInterpreter=\
|
||||
com.tencent.supersonic.chat.llm.HttpLLMInterpreter
|
||||
|
||||
com.tencent.supersonic.chat.api.component.SemanticInterpreter=\
|
||||
com.tencent.supersonic.knowledge.semantic.RemoteSemanticInterpreter
|
||||
|
||||
|
||||
@@ -91,7 +91,21 @@
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!--langchain4j-->
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<profiles>
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.tencent.supersonic.config;
|
||||
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
public class LangChain4jConfig {
|
||||
|
||||
@Bean
|
||||
EmbeddingModel embeddingModel() {
|
||||
return new AllMiniLmL6V2EmbeddingModel();
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,9 @@ com.tencent.supersonic.chat.api.component.SemanticCorrector=\
|
||||
com.tencent.supersonic.chat.corrector.GroupByCorrector, \
|
||||
com.tencent.supersonic.chat.corrector.HavingCorrector
|
||||
|
||||
com.tencent.supersonic.chat.llm.LLMInterpreter=\
|
||||
com.tencent.supersonic.chat.llm.HttpLLMInterpreter
|
||||
|
||||
com.tencent.supersonic.chat.api.component.SemanticInterpreter=\
|
||||
com.tencent.supersonic.knowledge.semantic.LocalSemanticInterpreter
|
||||
|
||||
@@ -44,4 +47,6 @@ com.tencent.supersonic.chat.responder.parse.ParseResponder=\
|
||||
|
||||
com.tencent.supersonic.chat.responder.execute.ExecuteResponder=\
|
||||
com.tencent.supersonic.chat.responder.execute.EntityInfoExecuteResponder, \
|
||||
com.tencent.supersonic.chat.responder.execute.SimilarMetricExecuteResponder
|
||||
com.tencent.supersonic.chat.responder.execute.SimilarMetricExecuteResponder
|
||||
|
||||
org.springframework.boot.autoconfigure.EnableAutoConfiguration=dev.langchain4j.LangChain4jAutoConfiguration
|
||||
@@ -40,3 +40,18 @@ embedding:
|
||||
url: http://127.0.0.1:9092
|
||||
functionCall:
|
||||
url: http://127.0.0.1:9092
|
||||
|
||||
|
||||
langchain4j:
|
||||
chat-model:
|
||||
provider: open_ai
|
||||
openai:
|
||||
api-key: api_key
|
||||
model-name: gpt-3.5-turbo
|
||||
temperature: 0.0
|
||||
timeout: PT60S
|
||||
|
||||
logging:
|
||||
level:
|
||||
dev.langchain4j: DEBUG
|
||||
dev.ai4j.openai4j: DEBUG
|
||||
312
launchers/standalone/src/main/resources/example.json
Normal file
312
launchers/standalone/src/main/resources/example.json
Normal file
@@ -0,0 +1,312 @@
|
||||
[
|
||||
{
|
||||
"currentDate":"2020-12-01",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]",
|
||||
"question":"比较jackjchen和robinlee在内容库的访问次数",
|
||||
"priorSchemaLinks":"['jackjchen'->用户名, 'robinlee'->用户名]",
|
||||
"analysis":"让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问:\n“比较jackjchen和robinlee”,所以我们需要column=[用户名],cell values = ['jackjchen', 'robinlee'],所以有[用户名:('jackjchen', 'robinlee')]\n”内容库的访问次数“,所以我们需要column=[访问次数]",
|
||||
"schemaLinks":"[\"用户名\":(\"'jackjchen'\", \"'robinlee'\"), \"访问次数\"]",
|
||||
"sql":"select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee')"
|
||||
},
|
||||
{
|
||||
"currentDate":"2022-11-06",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]",
|
||||
"question":"内容库近12个月访问人数 按部门",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问:\n”内容库近12个月“,所以我们需要column=[数据日期],cell values = [12],所以有[数据日期:(12)]\n“访问人数”,所以我们需要column=[访问人数]\n”按部门“,所以我们需要column=[部门]",
|
||||
"schemaLinks":"[\"数据日期\":(12), \"访问人数\", \"部门\"]",
|
||||
"sql":"select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-04-21",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]",
|
||||
"question":"内容库美术部、技术研发部的访问时长",
|
||||
"priorSchemaLinks":"['美术部'->部门, '技术研发部'->部门]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问:\n“访问时长”,所以我们需要column=[访问时长]\n”内容库美术部、技术研发部“,所以我们需要column=[部门], cell values = ['美术部', '技术研发部'],所以有[部门:('美术部', '技术研发部')]",
|
||||
"schemaLinks":"[\"访问时长\", \"部门\":(\"'美术部'\", \"'技术研发部'\")]",
|
||||
"sql":"select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部')"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-21",
|
||||
"tableName":"严选",
|
||||
"fieldsList":"[\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]",
|
||||
"question":"近3天海田飞系MPPM结算播放份额",
|
||||
"priorSchemaLinks":"['海田飞系'->严选版权归属系]",
|
||||
"analysis":"让我们一步一步地思考。在问题“近3天海田飞系MPPM结算播放份额“中,我们被问:\n“MPPM结算播放份额”,所以我们需要column=[结算播放份额], \n”海田飞系“,所以我们需要column=[严选版权归属系], cell values = ['海田飞系'],所以有[严选版权归属系:('海田飞系')],\n”近3天“,所以我们需要column=[数据日期], cell values = [3],所以有[数据日期:(3)]",
|
||||
"schemaLinks":"[\"结算播放份额\", \"严选版权归属系\":(\"'海田飞系'\"), \"数据日期\":(3)]",
|
||||
"sql":"select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-05-22",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]",
|
||||
"question":"对比近7天翻唱版和纯音乐的歌曲播放量",
|
||||
"priorSchemaLinks":"['纯音乐'->语种, '翻唱版'->歌曲版本]",
|
||||
"analysis":"让我们一步一步地思考。在问题“对比近3天翻唱版和纯音乐的歌曲播放量“中,我们被问:\n“歌曲播放量”,所以我们需要column=[结算播放量]\n”翻唱版“,所以我们需要column=[歌曲版本], cell values = ['翻唱版'],所以有[歌曲版本:('翻唱版')]\n”和纯音乐的歌曲“,所以我们需要column=[语种], cell values = ['纯音乐'],所以有[语种:('纯音乐')]\n”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]",
|
||||
"schemaLinks":"[\"结算播放量\", \"歌曲版本\":(\"'翻唱版'\"), \"语种\":(\"'纯音乐'\"), \"数据日期\":(7)]",
|
||||
"sql":"select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-05-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"[\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]",
|
||||
"question":"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数",
|
||||
"priorSchemaLinks":"['1527896'->MPPM歌手ID, '1565463'->MPPM歌手ID, '2141459'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数“中,我们被问:\n“粉丝数”,所以我们需要column=[粉丝数]\n”陈拙悬、孟梅琦、赖媚韵“,所以我们需要column=[歌手名], cell values = ['陈拙悬', '孟梅琦', '赖媚韵'],所以有[歌手名:('陈拙悬', '孟梅琦', '赖媚韵')]",
|
||||
"schemaLinks":"[\"粉丝数\", \"歌手名\":(\"'陈拙悬'\", \"'孟梅琦'\", \"'赖媚韵'\")]",
|
||||
"sql":"select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵')"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-07-31",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"播放量大于1万的歌曲有多少",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“播放量大于1万的歌曲有多少“中,我们被问:\n“歌曲有多少”,所以我们需要column=[歌曲名]\n”播放量大于1万的“,所以我们需要column=[结算播放量], cell values = [10000],所以有[结算播放量:(10000)]",
|
||||
"schemaLinks":"[\"歌曲名\", \"结算播放量\":(10000)]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 结算播放量 > 10000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-07-31",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库访问时长小于1小时,且来自美术部的用户是哪些",
|
||||
"priorSchemaLinks":"['美术部'->部门]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问:\n“用户是哪些”,所以我们需要column=[用户名]\n”美术部的“,所以我们需要column=[部门], cell values = ['美术部'],所以有[部门:('美术部')]\n”访问时长小于1小时“,所以我们需要column=[访问时长], cell values = [1],所以有[访问时长:(1)]",
|
||||
"schemaLinks":"[\"用户名\", \"部门\":(\"'美术部'\"), \"访问时长\":(1)]",
|
||||
"sql":"select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-31",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库pv最高的用户有哪些",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问:\n“用户有哪些”,所以我们需要column=[用户名]\n”pv最高的“,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]",
|
||||
"schemaLinks":"[\"用户名\", \"访问次数\":(1)]",
|
||||
"sql":"select 用户名 from 内容库产品 order by 访问次数 desc limit 1"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]",
|
||||
"question":"近90天袁亚伟播放量平均值是多少",
|
||||
"priorSchemaLinks":"['152789226'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“近90天袁亚伟播放量平均值是多少“中,我们被问:\n“播放量平均值是多少”,所以我们需要column=[结算播放量]\n”袁亚伟“,所以我们需要column=[歌手名], cell values = ['袁亚伟'],所以有[歌手名:('袁亚伟')]\n”近90天“,所以我们需要column=[数据日期], cell values = [90],所以有[数据日期:(90)]",
|
||||
"schemaLinks":"[\"结算播放量\", \"歌手名\":(\"'袁亚伟'\"), \"数据日期\":(90)]",
|
||||
"sql":"select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]",
|
||||
"question":"周倩倩近7天结算播放量总和是多少",
|
||||
"priorSchemaLinks":"['199509'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“周倩倩近7天结算播放量总和是多少“中,我们被问:\n“结算播放量总和是多少”,所以我们需要column=[结算播放量]\n”周倩倩“,所以我们需要column=[歌手名], cell values = ['周倩倩'],所以有[歌手名:('周倩倩')]\n”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]",
|
||||
"schemaLinks":"[\"结算播放量\", \"歌手名\":(\"'周倩倩'\"), \"数据日期\":(7)]",
|
||||
"sql":"select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-14",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]",
|
||||
"question":"内容库访问次数大于1k的部门是哪些",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问:\n“部门是哪些”,所以我们需要column=[部门]\n”访问次数大于1k的“,所以我们需要column=[访问次数], cell values = [1000],所以有[访问次数:(1000)]",
|
||||
"schemaLinks":"[\"部门\", \"访问次数\":(1000)]",
|
||||
"sql":"select 部门 from 内容库产品 where 访问次数 > 1000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-18",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"陈亿训唱的所有的播放量大于20k的孤勇者有哪些",
|
||||
"priorSchemaLinks":"['199509'->MPPM歌手ID, '1527123'->MPPM歌曲ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“陈亿训唱的所有的播放量大于20k的孤勇者有哪些“中,我们被问:\n“孤勇者有哪些”,所以我们需要column=[歌曲名], cell values = ['孤勇者'],所以有[歌曲名:('孤勇者')]\n”播放量大于20k的“,所以我们需要column=[结算播放量], cell values = [20000],所以有[结算播放量:(20000)]\n”陈亿训唱的“,所以我们需要column=[歌手名], cell values = ['陈亿训'],所以有[歌手名:('陈亿训')]",
|
||||
"schemaLinks":"[\"歌曲名\":(\"'孤勇者'\"), \"结算播放量\":(20000), \"歌手名\":(\"'陈亿训'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者'"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-18",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"周洁轮去年发布的歌曲有哪些",
|
||||
"priorSchemaLinks":"['23109'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“周洁轮去年发布的歌曲有哪些“中,我们被问:\n“歌曲有哪些”,所以我们需要column=[歌曲名]\n”去年发布的“,所以我们需要column=[发布时间], cell values = [1],所以有[发布时间:(1)]\n”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]",
|
||||
"schemaLinks":"[\"歌曲名\", \"发布时间\":(1), \"歌手名\":(\"'周洁轮'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮'"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-11",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"[\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]",
|
||||
"question":"我想要近半年签约的播放量前十的歌手有哪些",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“我想要近半年签约的播放量前十的歌手“中,我们被问:\n“歌手有哪些”,所以我们需要column=[歌手名]\n”播放量前十的“,所以我们需要column=[结算播放量], cell values = [10],所以有[结算播放量:(10)]\n”近半年签约的“,所以我们需要column=[签约日期], cell values = [0.5],所以有[签约日期:(0.5)]",
|
||||
"schemaLinks":"[\"歌手名\", \"结算播放量\":(10), \"签约日期\":(0.5)]",
|
||||
"sql":"select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 order by 结算播放量 desc limit 10"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]",
|
||||
"question":"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“最近一年发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”最近一年发行的“,所以我们需要column=[发行日期], cell values = [1],所以有[发行日期:(1)]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]",
|
||||
"schemaLinks":"[\"歌曲名\", \"发行日期\":(1), \"数据日期\":(7), \"结算播放量\":(10000000)]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]",
|
||||
"question":"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“今年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”今年以来发行的“,所以我们需要column=[发行日期], cell values = [0],所以有[发行日期:(0)]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]",
|
||||
"schemaLinks":"[\"歌曲名\", \"发行日期\":(0), \"数据日期\":(7), \"结算播放量\":(10000000)]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]",
|
||||
"question":"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"['514129144'->MPPM歌曲ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:\n“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]\n”2023年以来发行的“,所以我们需要column=[发行日期], cell values = ['2023-01-01'],所以有[发行日期:('2023-01-01')]\n”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]",
|
||||
"schemaLinks":"[\"歌曲名\", \"发行日期\":(\"'2023-01-01'\"), \"数据日期\":(7), \"结算播放量\":(10000000)]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 发行日期 >= '2023-01-01' and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-01",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"周洁轮2023年6月之后发布的歌曲有哪些",
|
||||
"priorSchemaLinks":"['23109'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“周洁轮2023年6月之后发布的歌曲有哪些“中,我们被问:\n“歌曲有哪些”,所以我们需要column=[歌曲名]\n”2023年6月之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-06-01'],所以有[发布时间:('2023-06-01')]\n”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]",
|
||||
"schemaLinks":"[\"歌曲名\", \"发布时间\":(\"'2023-06-01'\"), \"歌手名\":(\"'周洁轮'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2023-06-01' and 歌手名 = '周洁轮'"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-01",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?",
|
||||
"priorSchemaLinks":"['2312311'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?“中,我们被问:\n“歌曲中,有哪些”,所以我们需要column=[歌曲名]\n“播放量大于500W的”,所以我们需要column=[结算播放量], cell values = [5000000],所以有[结算播放量:(5000000)]\n”邓梓琦在2023年1月5日之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-01-05'],所以有[发布时间:('2023-01-05')]\n”邓梓琦“,所以我们需要column=[歌手名], cell values = ['邓梓琦'],所以有[歌手名:('邓梓琦')]",
|
||||
"schemaLinks":"[\"歌曲名\", \"结算播放量\":(5000000), \"发布时间\":(\"'2023-01-05'\"), \"歌手名\":(\"'邓梓琦'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2023-01-05' and 歌手名 = '邓梓琦' and 结算播放量 > 5000000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-17",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"2023年6月以后,张亮英播放量大于200万的歌曲有哪些?",
|
||||
"priorSchemaLinks":"['45453'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“2023年6月以后,张亮英播放量大于200万的歌曲有哪些?“中,我们被问:\n“播放量大于200万的”,所以我们需要column=[结算播放量], cell values = [2000000],所以有[结算播放量:(2000000)]\n”2023年6月以后,张亮英“,所以我们需要column=[数据日期, 歌手名], cell values = ['2023-06-01', '张亮英'],所以有[数据日期:('2023-06-01'), 歌手名:('张亮英')],\n”歌曲有哪些“,所以我们需要column=[歌曲名]",
|
||||
"schemaLinks":"[\"结算播放量\":(2000000), \"数据日期\":(\"'2023-06-01'\"), \"歌手名\":(\"'张亮英'\"), \"歌曲名\"]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 数据日期 >= '2023-06-01' and 歌手名 = '张亮英' and 结算播放量 > 2000000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-16",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些",
|
||||
"priorSchemaLinks":"['23109'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些“中,我们被问:\n“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]\n”2021年6月以后发布的“,所以我们需要column=[发布时间], cell values = ['2021-06-01'],所以有[发布时间:('2021-06-01')]\n”李雨纯“,所以我们需要column=[歌手名], cell values = ['李雨纯'],所以有[歌手名:('李雨纯')]",
|
||||
"schemaLinks":"[\"结算播放量\":(200000), \"发布时间\":(\"'2021-06-01'\"), \"歌手名\":(\"'李雨纯'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '2021-06-01' and 歌手名 = '李雨纯' and 结算播放量 > 200000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-08-16",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]",
|
||||
"question":"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些",
|
||||
"priorSchemaLinks":"['4234234'->MPPM歌手ID]",
|
||||
"analysis":"让我们一步一步地思考。在问题“刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些“中,我们被问:\n“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]\n”1992年4月2日到2020年5月2日之间发布的“, 所以我们需要column=[发布时间], cell values = ['1992-04-02', '2020-05-02'],所以有[发布时间:('1992-04-02', '2020-05-02')]\n”刘锝桦“,所以我们需要column=[歌手名], cell values = ['刘锝桦'],所以有[歌手名:('刘锝桦')]",
|
||||
"schemaLinks":"[\"结算播放量\":(200000), \"发布时间\":(\"'1992-04-02'\", \"'2020-05-02'\"), \"歌手名\":(\"'刘锝桦'\")]",
|
||||
"sql":"select 歌曲名 from 歌曲库 where 发布时间 >= '1992-04-02' and 发布时间 <= '2020-05-02' and 歌手名 = '刘锝桦' and 结算播放量 > 200000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库近30天访问次数的平均数",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问:\n“访问次数的平均数”,所以我们需要column=[访问次数]\n”内容库近30天“,所以我们需要column=[数据日期], cell values = [30],所以有[数据日期:(30)]",
|
||||
"schemaLinks":"[\"访问次数\", \"数据日期\":(30)]",
|
||||
"sql":"select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 "
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库近半年哪个月的访问次数汇总最高",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问:\n“访问次数汇总最高”,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]\n”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]",
|
||||
"schemaLinks":"[\"访问次数\":(1), \"数据日期\":(0.5)]",
|
||||
"sql":"select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库近半年每个月的平均访问次数",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问:\n“每个月的平均访问次数”,所以我们需要column=[访问次数]\n”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]",
|
||||
"schemaLinks":"[\"访问次数\", \"数据日期\":(0.5)]",
|
||||
"sql":"select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期)"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"内容库 按部门统计访问次数 top10 的部门",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问:\n“访问次数 top10 的部门”,所以我们需要column=[访问次数], cell values = [10],所以有[访问次数:(10)]\n”内容库 按部门统计“,所以我们需要column=[部门]",
|
||||
"schemaLinks":"[\"访问次数\":(10), \"部门\"]",
|
||||
"sql":"select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"[\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]",
|
||||
"question":"超音速 近7个月,月度总访问量超过 2万的月份",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“超音速 近7个月,月度总访问量超过 2万的月份“中,我们被问:\n“月度总访问量超过 2万的月份”,所以我们需要column=[访问次数], cell values = [20000],所以有[访问次数:(20000)]\n”超音速 近7个月“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]",
|
||||
"schemaLinks":"[\"访问次数\":(20000), \"数据日期\":(7)]",
|
||||
"sql":"select MONTH(数据日期) from 内容库产品 where datediff('day', 数据日期, '2023-09-10') <= 7 group by MONTH(数据日期) having sum(访问次数) > 20000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]",
|
||||
"question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量“中,我们被问:\n“按月粒度来统计近1年的运营播放量”,所以我们需要column=[运营播放量, 数据日期], cell values = [1],所以有[运营播放量, 数据日期:(1)]\n”按播放量取top 100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]\n“2022年7月到2023年7月之间发布到歌曲”,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]",
|
||||
"schemaLinks":"[\"运营播放量\", \"数据日期\":(1), \"播放量\":(100), \"发布日期\":(\"'2022-07-01'\", \"'2023-07-01'\")]",
|
||||
"sql":"select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期)"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"[\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]",
|
||||
"question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份",
|
||||
"priorSchemaLinks":"[]",
|
||||
"analysis":"让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份“中,我们被问:\n“筛选出其中运营播放量之和大于2k的月份”,所以我们需要column=[运营播放量], cell values = [2000],所以有[运营播放量:(2000)]\n”按月粒度来统计近1年的运营播放量之和“,所以我们需要column=[数据日期], cell values = [1],所以有[数据日期:(1)]\n”按播放量取top100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]\n”2022年7月到2023年7月之间发布到歌曲“,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]",
|
||||
"schemaLinks":"[\"运营播放量\":(2000), \"数据日期\":(1), \"播放量\":(100), \"发布日期\":(\"'2022-07-01'\", \"'2023-07-01'\")]",
|
||||
"sql":"select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期) having sum(运营播放量) > 2000"
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-11-01",
|
||||
"tableName":"营销月模型",
|
||||
"fieldsList":"[\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]",
|
||||
"question":"今年智能机在哪个国家的销量之和最高",
|
||||
"priorSchemaLinks":"['智能机'->机型类别]",
|
||||
"analysis":"让我们一步一步地思考。在问题“今年智能机在哪个国家的销量之和最高“中,我们被问:\n“销量最高”,所以我们需要column=[销量], cell values = [1],所以有[销量:(1)]\n”今年“,所以我们需要column=[数据日期], cell values = ['2023-01-01', '2023-11-01'],所以有[数据日期:('2023-01-01', '2023-11-01')]\n”智能机“,所以我们需要column=[机型类别], cell values = ['智能机'],所以有[机型类别:('智能机')]",
|
||||
"schemaLinks":"[\"销量\":(1), \"数据日期\":(\"'2023-01-01'\", \"'2023-11-01'\"), \"机型类别\":(\"'智能机'\")]",
|
||||
"sql":"select 国家中文名, sum(销量) from 营销月模型 where 机型类别 = '智能机' and 数据日期 >= '2023-01-01' and 数据日期 <= '2023-11-01' group by 国家中文名 order by sum(销量) desc limit 1"
|
||||
}
|
||||
]
|
||||
52
pom.xml
52
pom.xml
@@ -71,6 +71,7 @@
|
||||
<spotless.python.black.version>22.3.0</spotless.python.black.version>
|
||||
<easyexcel.version>2.2.6</easyexcel.version>
|
||||
<poi.version>3.17</poi.version>
|
||||
<langchain4j.version>0.24.0</langchain4j.version>
|
||||
</properties>
|
||||
|
||||
<dependencyManagement>
|
||||
@@ -94,6 +95,57 @@
|
||||
<artifactId>guava</artifactId>
|
||||
<version>${guava.version}</version>
|
||||
</dependency>
|
||||
<!--langchain4j-->
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-parent</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-core</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-spring-boot-starter</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-open-ai</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-hugging-face</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-chroma</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-embeddings</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-hugging-face</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
|
||||
<version>${langchain4j.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
package com.tencent.supersonic.semantic.model.domain.pojo;
|
||||
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.pojo.RecordInfo;
|
||||
import lombok.Data;
|
||||
import org.assertj.core.util.Lists;
|
||||
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class Database extends RecordInfo {
|
||||
|
||||
Reference in New Issue
Block a user