mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
(improvement)(headless)Refactor the prompts for generating semantic aliases.
This commit is contained in:
@@ -26,19 +26,19 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||
public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
|
||||
|
||||
private static final String INSTRUCTION = ""
|
||||
+ "#Role: You are a data analyst experienced in SQL languages.\n"
|
||||
+ "#Task: You will be provided a natural language question asked by users,"
|
||||
+ "\n#Role: You are a data analyst experienced in SQL languages."
|
||||
+ "#Task: You will be provided with a natural language question asked by users,"
|
||||
+ "please convert it to a SQL query so that relevant data could be returned "
|
||||
+ "by executing the SQL query against underlying database.\n"
|
||||
+ "#Rules:"
|
||||
+ "by executing the SQL query against underlying database."
|
||||
+ "\n#Rules:"
|
||||
+ "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate."
|
||||
+ "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator."
|
||||
+ "3.ALWAYS calculate the absolute date range by yourself."
|
||||
+ "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`."
|
||||
+ "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed."
|
||||
+ "6.ONLY respond with the converted SQL statement.\n"
|
||||
+ "#Exemplars:\n{{exemplar}}"
|
||||
+ "#Question:{{question}} #Schema:{{schema}} #SideInfo:{{information}} #SQL:";
|
||||
+ "6.ONLY respond with the converted SQL statement."
|
||||
+ "\n#Exemplars:\n{{exemplar}}"
|
||||
+ "Question:{{question}},Schema:{{schema}},SideInfo:{{information}},SQL:";
|
||||
|
||||
@Override
|
||||
public LLMResp generate(LLMReq llmReq) {
|
||||
@@ -83,7 +83,7 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
|
||||
private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) {
|
||||
StringBuilder exemplars = new StringBuilder();
|
||||
for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) {
|
||||
String exemplarStr = String.format("#Question:%s #Schema:%s #SideInfo:%s #SQL:%s\n",
|
||||
String exemplarStr = String.format("Question:%s,Schema:%s,SideInfo:%s,SQL:%s\n",
|
||||
exemplar.getQuestion(), exemplar.getDbSchema(),
|
||||
exemplar.getSideInfo(), exemplar.getSql());
|
||||
exemplars.append(exemplarStr);
|
||||
|
||||
@@ -318,7 +318,7 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
|
||||
@Override
|
||||
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
|
||||
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
|
||||
dimensionReq.getBizName(), "", dimensionReq.getDescription(), false);
|
||||
dimensionReq.getBizName(), "", dimensionReq.getDescription());
|
||||
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
|
||||
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
||||
});
|
||||
|
||||
@@ -511,7 +511,7 @@ public class MetricServiceImpl extends ServiceImpl<MetricDOMapper, MetricDO>
|
||||
public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) {
|
||||
|
||||
String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "",
|
||||
metricReq.getDescription(), !"".equals(metricReq.getDataFormatType()));
|
||||
metricReq.getDescription());
|
||||
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
|
||||
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
||||
});
|
||||
|
||||
@@ -6,97 +6,89 @@ import com.alibaba.fastjson.JSONException;
|
||||
import dev.langchain4j.data.message.AiMessage;
|
||||
import dev.langchain4j.data.message.SystemMessage;
|
||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||
import dev.langchain4j.model.input.Prompt;
|
||||
import dev.langchain4j.model.input.PromptTemplate;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import dev.langchain4j.provider.ModelProvider;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class AliasGenerateHelper {
|
||||
|
||||
public String getChatCompletion(String message) {
|
||||
SystemMessage from = SystemMessage.from(message);
|
||||
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
|
||||
Response<AiMessage> response = chatLanguageModel.generate(from);
|
||||
log.info("message:{}\n response:{}", message, response);
|
||||
return response.content().text();
|
||||
}
|
||||
private static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline");
|
||||
|
||||
private static final String NAME_ALIAS_INSTRUCTION = ""
|
||||
+ "\n#Role: You are a professional data analyst specializing in metrics and dimensions."
|
||||
+ "\n#Task: You will be provided with metadata about a metric or dimension, please help "
|
||||
+ "generate a few aliases in the same language as its `fieldName`."
|
||||
+ "\n#Rules:"
|
||||
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3."
|
||||
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions."
|
||||
+ "3. Please pay attention to the quality of the generated aliases and "
|
||||
+ "avoid creating aliases that look like test data."
|
||||
+ "4. Please output as a json string array."
|
||||
+ "\n#Metadata: {'table':'{{table}}', 'name':'{{name}}', 'type':'{{type}}', "
|
||||
+ "'field':'field', 'description':'{{desc}}'}"
|
||||
+ "\n#Output:";
|
||||
|
||||
private static final String VALUE_ALIAS_INSTRUCTION = ""
|
||||
+ "\n#Role: You are a professional data analyst."
|
||||
+ "\n#Task: You will be provided with a json array of dimension values,"
|
||||
+ "please help generate a few aliases for each value."
|
||||
+ "\n#Rule:"
|
||||
+ "1. ALWAYS output json array for each value."
|
||||
+ "2. The aliases should be in the same language as its original value."
|
||||
+ "\n#Exemplar:"
|
||||
+ "Values: [\\\"qq_music\\\",\\\"kugou_music\\\"], "
|
||||
+ "Output: {\\\"tran\\\":[\\\"qq音乐\\\",\\\"酷狗音乐\\\"],"
|
||||
+ " \\\"alias\\\":{\\\"qq_music\\\":[\\\"q音\\\",\\\"qq音乐\\\"],"
|
||||
+ " \\\"kugou_music\\\":[\\\"kugou\\\",\\\"酷狗\\\"]}}"
|
||||
+ "\nValues: {{values}}, Output:";
|
||||
|
||||
public String generateAlias(String mockType,
|
||||
String name,
|
||||
String bizName,
|
||||
String table,
|
||||
String desc,
|
||||
Boolean isPercentage) {
|
||||
String msg = "Assuming you are a professional data analyst specializing in metrics and dimensions, "
|
||||
+ "you have a vast amount of data analysis metrics content. You are familiar with the basic"
|
||||
+ " format of the content,Now, Construct your answer Based on the following json-schema.\n"
|
||||
+ "{\n"
|
||||
+ "\"$schema\": \"http://json-schema.org/draft-07/schema#\",\n"
|
||||
+ "\"type\": \"array\",\n"
|
||||
+ "\"minItems\": 2,\n"
|
||||
+ "\"maxItems\": 4,\n"
|
||||
+ "\"items\": {\n"
|
||||
+ "\"type\": \"string\",\n"
|
||||
+ "\"description\": \"Assuming you are a data analyst and give a defined "
|
||||
+ mockType
|
||||
+ " name: "
|
||||
+ name + ","
|
||||
+ "this "
|
||||
+ mockType
|
||||
+ " is from database and table: "
|
||||
+ table + ",This "
|
||||
+ mockType
|
||||
+ " calculates the field source: "
|
||||
+ bizName
|
||||
+ ", The description of this metrics is: "
|
||||
+ desc
|
||||
+ ", provide some aliases for this, please take chinese or english,"
|
||||
+ "You must adhere to the following rules:\n"
|
||||
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3.\n"
|
||||
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions.\n"
|
||||
+ "3. Please pay attention to the quality of the generated aliases and "
|
||||
+ " avoid creating aliases that look like test data.\n"
|
||||
+ "4. Please generate more Chinese aliases."
|
||||
+ "},\n"
|
||||
+ "\"additionalProperties\":false}\n"
|
||||
+ "Please double-check whether the answer conforms to the format described in the JSON-schema.\n"
|
||||
+ "回答格式示例:"
|
||||
+ "[\n"
|
||||
+ " \"人数\",\n"
|
||||
+ " \"员工人数\",\n"
|
||||
+ " \"员工数量\",\n"
|
||||
+ " \"员工总数\"\n"
|
||||
+ "]\n"
|
||||
+ "请严格按照示例格式进行生成。"
|
||||
+ "ANSWER JSON:";
|
||||
log.info("msg:{}", msg);
|
||||
return getChatCompletion(msg);
|
||||
String desc) {
|
||||
Map<String, Object> variable = new HashMap<>();
|
||||
variable.put("table", table);
|
||||
variable.put("name", name);
|
||||
variable.put("field", bizName);
|
||||
variable.put("type", mockType);
|
||||
variable.put("desc", desc);
|
||||
|
||||
Prompt prompt = PromptTemplate.from(NAME_ALIAS_INSTRUCTION).apply(variable);
|
||||
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias reqPrompt:{}", prompt.text());
|
||||
String response = getChatCompletion(prompt);
|
||||
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias modelResp:{}", response);
|
||||
return response;
|
||||
}
|
||||
|
||||
public String generateDimensionValueAlias(String json) {
|
||||
String msg = "Assuming you are a professional data analyst specializing in indicators,for you a json list,"
|
||||
+ "the required content to follow is as follows: \n"
|
||||
+ "1. The format of JSON,\n"
|
||||
+ "2. Only return in JSON format,\n"
|
||||
+ "3. the array item > 1 and < 5,more alias,\n"
|
||||
+ "for example:\n"
|
||||
+ "input:[\"qq_music\",\"kugou_music\"],\n"
|
||||
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
|
||||
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
|
||||
+ "input:[\"qq_music\",\"kugou_music\"],\n"
|
||||
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
|
||||
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
|
||||
+ "input:[\"大专\",\"本科\",\"硕士研究生\"],\n"
|
||||
+ "out:{\"tran\":[\"大专\",\"本科\",\"硕士研究生\"],"
|
||||
+ "\"alias\":{\"大专\":[\"专科\",\"大学专科\"],\"本科\":[\"学士\",\"本科生\"],\"硕士研究生\":[\"硕士\",\"研究生\"]}},\n"
|
||||
+ "now input: "
|
||||
+ json + ",\n"
|
||||
+ "answer json:";
|
||||
log.info("msg:{}", msg);
|
||||
return getChatCompletion(msg);
|
||||
Map<String, Object> variable = new HashMap<>();
|
||||
variable.put("values", json);
|
||||
|
||||
Prompt prompt = PromptTemplate.from(VALUE_ALIAS_INSTRUCTION).apply(variable);
|
||||
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias reqPrompt:{}", prompt.text());
|
||||
String response = getChatCompletion(prompt);
|
||||
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias modelResp:{}", response);
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
private String getChatCompletion(Prompt prompt) {
|
||||
SystemMessage from = prompt.toSystemMessage();
|
||||
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
|
||||
Response<AiMessage> response = chatLanguageModel.generate(from);
|
||||
return response.content().text();
|
||||
}
|
||||
|
||||
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {
|
||||
|
||||
Reference in New Issue
Block a user