mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
(improvement)(headless)Refactor the prompts for generating semantic aliases.
This commit is contained in:
@@ -26,19 +26,19 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||||||
public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
|
public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
|
||||||
|
|
||||||
private static final String INSTRUCTION = ""
|
private static final String INSTRUCTION = ""
|
||||||
+ "#Role: You are a data analyst experienced in SQL languages.\n"
|
+ "\n#Role: You are a data analyst experienced in SQL languages."
|
||||||
+ "#Task: You will be provided a natural language question asked by users,"
|
+ "#Task: You will be provided with a natural language question asked by users,"
|
||||||
+ "please convert it to a SQL query so that relevant data could be returned "
|
+ "please convert it to a SQL query so that relevant data could be returned "
|
||||||
+ "by executing the SQL query against underlying database.\n"
|
+ "by executing the SQL query against underlying database."
|
||||||
+ "#Rules:"
|
+ "\n#Rules:"
|
||||||
+ "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate."
|
+ "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate."
|
||||||
+ "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator."
|
+ "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator."
|
||||||
+ "3.ALWAYS calculate the absolute date range by yourself."
|
+ "3.ALWAYS calculate the absolute date range by yourself."
|
||||||
+ "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`."
|
+ "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`."
|
||||||
+ "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed."
|
+ "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed."
|
||||||
+ "6.ONLY respond with the converted SQL statement.\n"
|
+ "6.ONLY respond with the converted SQL statement."
|
||||||
+ "#Exemplars:\n{{exemplar}}"
|
+ "\n#Exemplars:\n{{exemplar}}"
|
||||||
+ "#Question:{{question}} #Schema:{{schema}} #SideInfo:{{information}} #SQL:";
|
+ "Question:{{question}},Schema:{{schema}},SideInfo:{{information}},SQL:";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LLMResp generate(LLMReq llmReq) {
|
public LLMResp generate(LLMReq llmReq) {
|
||||||
@@ -83,7 +83,7 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
|
|||||||
private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) {
|
private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) {
|
||||||
StringBuilder exemplars = new StringBuilder();
|
StringBuilder exemplars = new StringBuilder();
|
||||||
for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) {
|
for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) {
|
||||||
String exemplarStr = String.format("#Question:%s #Schema:%s #SideInfo:%s #SQL:%s\n",
|
String exemplarStr = String.format("Question:%s,Schema:%s,SideInfo:%s,SQL:%s\n",
|
||||||
exemplar.getQuestion(), exemplar.getDbSchema(),
|
exemplar.getQuestion(), exemplar.getDbSchema(),
|
||||||
exemplar.getSideInfo(), exemplar.getSql());
|
exemplar.getSideInfo(), exemplar.getSql());
|
||||||
exemplars.append(exemplarStr);
|
exemplars.append(exemplarStr);
|
||||||
|
|||||||
@@ -318,7 +318,7 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
|
|||||||
@Override
|
@Override
|
||||||
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
|
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
|
||||||
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
|
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
|
||||||
dimensionReq.getBizName(), "", dimensionReq.getDescription(), false);
|
dimensionReq.getBizName(), "", dimensionReq.getDescription());
|
||||||
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
|
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
|
||||||
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -511,7 +511,7 @@ public class MetricServiceImpl extends ServiceImpl<MetricDOMapper, MetricDO>
|
|||||||
public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) {
|
public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) {
|
||||||
|
|
||||||
String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "",
|
String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "",
|
||||||
metricReq.getDescription(), !"".equals(metricReq.getDataFormatType()));
|
metricReq.getDescription());
|
||||||
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
|
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
|
||||||
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -6,97 +6,89 @@ import com.alibaba.fastjson.JSONException;
|
|||||||
import dev.langchain4j.data.message.AiMessage;
|
import dev.langchain4j.data.message.AiMessage;
|
||||||
import dev.langchain4j.data.message.SystemMessage;
|
import dev.langchain4j.data.message.SystemMessage;
|
||||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||||
|
import dev.langchain4j.model.input.Prompt;
|
||||||
|
import dev.langchain4j.model.input.PromptTemplate;
|
||||||
import dev.langchain4j.model.output.Response;
|
import dev.langchain4j.model.output.Response;
|
||||||
import dev.langchain4j.provider.ModelProvider;
|
import dev.langchain4j.provider.ModelProvider;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class AliasGenerateHelper {
|
public class AliasGenerateHelper {
|
||||||
|
|
||||||
public String getChatCompletion(String message) {
|
private static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline");
|
||||||
SystemMessage from = SystemMessage.from(message);
|
|
||||||
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
|
private static final String NAME_ALIAS_INSTRUCTION = ""
|
||||||
Response<AiMessage> response = chatLanguageModel.generate(from);
|
+ "\n#Role: You are a professional data analyst specializing in metrics and dimensions."
|
||||||
log.info("message:{}\n response:{}", message, response);
|
+ "\n#Task: You will be provided with metadata about a metric or dimension, please help "
|
||||||
return response.content().text();
|
+ "generate a few aliases in the same language as its `fieldName`."
|
||||||
}
|
+ "\n#Rules:"
|
||||||
|
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3."
|
||||||
|
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions."
|
||||||
|
+ "3. Please pay attention to the quality of the generated aliases and "
|
||||||
|
+ "avoid creating aliases that look like test data."
|
||||||
|
+ "4. Please output as a json string array."
|
||||||
|
+ "\n#Metadata: {'table':'{{table}}', 'name':'{{name}}', 'type':'{{type}}', "
|
||||||
|
+ "'field':'field', 'description':'{{desc}}'}"
|
||||||
|
+ "\n#Output:";
|
||||||
|
|
||||||
|
private static final String VALUE_ALIAS_INSTRUCTION = ""
|
||||||
|
+ "\n#Role: You are a professional data analyst."
|
||||||
|
+ "\n#Task: You will be provided with a json array of dimension values,"
|
||||||
|
+ "please help generate a few aliases for each value."
|
||||||
|
+ "\n#Rule:"
|
||||||
|
+ "1. ALWAYS output json array for each value."
|
||||||
|
+ "2. The aliases should be in the same language as its original value."
|
||||||
|
+ "\n#Exemplar:"
|
||||||
|
+ "Values: [\\\"qq_music\\\",\\\"kugou_music\\\"], "
|
||||||
|
+ "Output: {\\\"tran\\\":[\\\"qq音乐\\\",\\\"酷狗音乐\\\"],"
|
||||||
|
+ " \\\"alias\\\":{\\\"qq_music\\\":[\\\"q音\\\",\\\"qq音乐\\\"],"
|
||||||
|
+ " \\\"kugou_music\\\":[\\\"kugou\\\",\\\"酷狗\\\"]}}"
|
||||||
|
+ "\nValues: {{values}}, Output:";
|
||||||
|
|
||||||
public String generateAlias(String mockType,
|
public String generateAlias(String mockType,
|
||||||
String name,
|
String name,
|
||||||
String bizName,
|
String bizName,
|
||||||
String table,
|
String table,
|
||||||
String desc,
|
String desc) {
|
||||||
Boolean isPercentage) {
|
Map<String, Object> variable = new HashMap<>();
|
||||||
String msg = "Assuming you are a professional data analyst specializing in metrics and dimensions, "
|
variable.put("table", table);
|
||||||
+ "you have a vast amount of data analysis metrics content. You are familiar with the basic"
|
variable.put("name", name);
|
||||||
+ " format of the content,Now, Construct your answer Based on the following json-schema.\n"
|
variable.put("field", bizName);
|
||||||
+ "{\n"
|
variable.put("type", mockType);
|
||||||
+ "\"$schema\": \"http://json-schema.org/draft-07/schema#\",\n"
|
variable.put("desc", desc);
|
||||||
+ "\"type\": \"array\",\n"
|
|
||||||
+ "\"minItems\": 2,\n"
|
Prompt prompt = PromptTemplate.from(NAME_ALIAS_INSTRUCTION).apply(variable);
|
||||||
+ "\"maxItems\": 4,\n"
|
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias reqPrompt:{}", prompt.text());
|
||||||
+ "\"items\": {\n"
|
String response = getChatCompletion(prompt);
|
||||||
+ "\"type\": \"string\",\n"
|
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias modelResp:{}", response);
|
||||||
+ "\"description\": \"Assuming you are a data analyst and give a defined "
|
return response;
|
||||||
+ mockType
|
|
||||||
+ " name: "
|
|
||||||
+ name + ","
|
|
||||||
+ "this "
|
|
||||||
+ mockType
|
|
||||||
+ " is from database and table: "
|
|
||||||
+ table + ",This "
|
|
||||||
+ mockType
|
|
||||||
+ " calculates the field source: "
|
|
||||||
+ bizName
|
|
||||||
+ ", The description of this metrics is: "
|
|
||||||
+ desc
|
|
||||||
+ ", provide some aliases for this, please take chinese or english,"
|
|
||||||
+ "You must adhere to the following rules:\n"
|
|
||||||
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3.\n"
|
|
||||||
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions.\n"
|
|
||||||
+ "3. Please pay attention to the quality of the generated aliases and "
|
|
||||||
+ " avoid creating aliases that look like test data.\n"
|
|
||||||
+ "4. Please generate more Chinese aliases."
|
|
||||||
+ "},\n"
|
|
||||||
+ "\"additionalProperties\":false}\n"
|
|
||||||
+ "Please double-check whether the answer conforms to the format described in the JSON-schema.\n"
|
|
||||||
+ "回答格式示例:"
|
|
||||||
+ "[\n"
|
|
||||||
+ " \"人数\",\n"
|
|
||||||
+ " \"员工人数\",\n"
|
|
||||||
+ " \"员工数量\",\n"
|
|
||||||
+ " \"员工总数\"\n"
|
|
||||||
+ "]\n"
|
|
||||||
+ "请严格按照示例格式进行生成。"
|
|
||||||
+ "ANSWER JSON:";
|
|
||||||
log.info("msg:{}", msg);
|
|
||||||
return getChatCompletion(msg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String generateDimensionValueAlias(String json) {
|
public String generateDimensionValueAlias(String json) {
|
||||||
String msg = "Assuming you are a professional data analyst specializing in indicators,for you a json list,"
|
Map<String, Object> variable = new HashMap<>();
|
||||||
+ "the required content to follow is as follows: \n"
|
variable.put("values", json);
|
||||||
+ "1. The format of JSON,\n"
|
|
||||||
+ "2. Only return in JSON format,\n"
|
Prompt prompt = PromptTemplate.from(VALUE_ALIAS_INSTRUCTION).apply(variable);
|
||||||
+ "3. the array item > 1 and < 5,more alias,\n"
|
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias reqPrompt:{}", prompt.text());
|
||||||
+ "for example:\n"
|
String response = getChatCompletion(prompt);
|
||||||
+ "input:[\"qq_music\",\"kugou_music\"],\n"
|
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias modelResp:{}", response);
|
||||||
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
|
|
||||||
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
|
return response;
|
||||||
+ "input:[\"qq_music\",\"kugou_music\"],\n"
|
}
|
||||||
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
|
|
||||||
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
|
private String getChatCompletion(Prompt prompt) {
|
||||||
+ "input:[\"大专\",\"本科\",\"硕士研究生\"],\n"
|
SystemMessage from = prompt.toSystemMessage();
|
||||||
+ "out:{\"tran\":[\"大专\",\"本科\",\"硕士研究生\"],"
|
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
|
||||||
+ "\"alias\":{\"大专\":[\"专科\",\"大学专科\"],\"本科\":[\"学士\",\"本科生\"],\"硕士研究生\":[\"硕士\",\"研究生\"]}},\n"
|
Response<AiMessage> response = chatLanguageModel.generate(from);
|
||||||
+ "now input: "
|
return response.content().text();
|
||||||
+ json + ",\n"
|
|
||||||
+ "answer json:";
|
|
||||||
log.info("msg:{}", msg);
|
|
||||||
return getChatCompletion(msg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {
|
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {
|
||||||
|
|||||||
Reference in New Issue
Block a user