(improvement)(headless)Refactor the prompts for generating semantic aliases.

This commit is contained in:
jerryjzhang
2024-09-06 17:55:33 +08:00
parent 45777f0abc
commit ee15a88b06
4 changed files with 75 additions and 83 deletions

View File

@@ -26,19 +26,19 @@ import java.util.concurrent.ConcurrentHashMap;
public class OnePassSCSqlGenStrategy extends SqlGenStrategy { public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
private static final String INSTRUCTION = "" private static final String INSTRUCTION = ""
+ "#Role: You are a data analyst experienced in SQL languages.\n" + "\n#Role: You are a data analyst experienced in SQL languages."
+ "#Task: You will be provided a natural language question asked by users," + "#Task: You will be provided with a natural language question asked by users,"
+ "please convert it to a SQL query so that relevant data could be returned " + "please convert it to a SQL query so that relevant data could be returned "
+ "by executing the SQL query against underlying database.\n" + "by executing the SQL query against underlying database."
+ "#Rules:" + "\n#Rules:"
+ "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate." + "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate."
+ "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator." + "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator."
+ "3.ALWAYS calculate the absolute date range by yourself." + "3.ALWAYS calculate the absolute date range by yourself."
+ "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`." + "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`."
+ "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed." + "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed."
+ "6.ONLY respond with the converted SQL statement.\n" + "6.ONLY respond with the converted SQL statement."
+ "#Exemplars:\n{{exemplar}}" + "\n#Exemplars:\n{{exemplar}}"
+ "#Question:{{question}} #Schema:{{schema}} #SideInfo:{{information}} #SQL:"; + "Question:{{question}},Schema:{{schema}},SideInfo:{{information}},SQL:";
@Override @Override
public LLMResp generate(LLMReq llmReq) { public LLMResp generate(LLMReq llmReq) {
@@ -83,7 +83,7 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) { private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) {
StringBuilder exemplars = new StringBuilder(); StringBuilder exemplars = new StringBuilder();
for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) { for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) {
String exemplarStr = String.format("#Question:%s #Schema:%s #SideInfo:%s #SQL:%s\n", String exemplarStr = String.format("Question:%s,Schema:%s,SideInfo:%s,SQL:%s\n",
exemplar.getQuestion(), exemplar.getDbSchema(), exemplar.getQuestion(), exemplar.getDbSchema(),
exemplar.getSideInfo(), exemplar.getSql()); exemplar.getSideInfo(), exemplar.getSql());
exemplars.append(exemplarStr); exemplars.append(exemplarStr);

View File

@@ -318,7 +318,7 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
@Override @Override
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) { public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(), String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
dimensionReq.getBizName(), "", dimensionReq.getDescription(), false); dimensionReq.getBizName(), "", dimensionReq.getDescription());
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias); String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
return JSONObject.parseObject(ret, new TypeReference<List<String>>() { return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
}); });

View File

@@ -511,7 +511,7 @@ public class MetricServiceImpl extends ServiceImpl<MetricDOMapper, MetricDO>
public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) { public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) {
String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "", String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "",
metricReq.getDescription(), !"".equals(metricReq.getDataFormatType())); metricReq.getDescription());
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", ""); String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
return JSONObject.parseObject(ret, new TypeReference<List<String>>() { return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
}); });

View File

@@ -6,97 +6,89 @@ import com.alibaba.fastjson.JSONException;
import dev.langchain4j.data.message.AiMessage; import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage; import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.model.chat.ChatLanguageModel; import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.input.Prompt;
import dev.langchain4j.model.input.PromptTemplate;
import dev.langchain4j.model.output.Response; import dev.langchain4j.model.output.Response;
import dev.langchain4j.provider.ModelProvider; import dev.langchain4j.provider.ModelProvider;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
@Component @Component
@Slf4j @Slf4j
public class AliasGenerateHelper { public class AliasGenerateHelper {
public String getChatCompletion(String message) { private static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline");
SystemMessage from = SystemMessage.from(message);
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel(); private static final String NAME_ALIAS_INSTRUCTION = ""
Response<AiMessage> response = chatLanguageModel.generate(from); + "\n#Role: You are a professional data analyst specializing in metrics and dimensions."
log.info("message:{}\n response:{}", message, response); + "\n#Task: You will be provided with metadata about a metric or dimension, please help "
return response.content().text(); + "generate a few aliases in the same language as its `fieldName`."
} + "\n#Rules:"
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3."
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions."
+ "3. Please pay attention to the quality of the generated aliases and "
+ "avoid creating aliases that look like test data."
+ "4. Please output as a json string array."
+ "\n#Metadata: {'table':'{{table}}', 'name':'{{name}}', 'type':'{{type}}', "
+ "'field':'field', 'description':'{{desc}}'}"
+ "\n#Output:";
private static final String VALUE_ALIAS_INSTRUCTION = ""
+ "\n#Role: You are a professional data analyst."
+ "\n#Task: You will be provided with a json array of dimension values,"
+ "please help generate a few aliases for each value."
+ "\n#Rule:"
+ "1. ALWAYS output json array for each value."
+ "2. The aliases should be in the same language as its original value."
+ "\n#Exemplar:"
+ "Values: [\\\"qq_music\\\",\\\"kugou_music\\\"], "
+ "Output: {\\\"tran\\\":[\\\"qq音乐\\\",\\\"酷狗音乐\\\"],"
+ " \\\"alias\\\":{\\\"qq_music\\\":[\\\"q音\\\",\\\"qq音乐\\\"],"
+ " \\\"kugou_music\\\":[\\\"kugou\\\",\\\"酷狗\\\"]}}"
+ "\nValues: {{values}}, Output:";
public String generateAlias(String mockType, public String generateAlias(String mockType,
String name, String name,
String bizName, String bizName,
String table, String table,
String desc, String desc) {
Boolean isPercentage) { Map<String, Object> variable = new HashMap<>();
String msg = "Assuming you are a professional data analyst specializing in metrics and dimensions, " variable.put("table", table);
+ "you have a vast amount of data analysis metrics content. You are familiar with the basic" variable.put("name", name);
+ " format of the content,Now, Construct your answer Based on the following json-schema.\n" variable.put("field", bizName);
+ "{\n" variable.put("type", mockType);
+ "\"$schema\": \"http://json-schema.org/draft-07/schema#\",\n" variable.put("desc", desc);
+ "\"type\": \"array\",\n"
+ "\"minItems\": 2,\n" Prompt prompt = PromptTemplate.from(NAME_ALIAS_INSTRUCTION).apply(variable);
+ "\"maxItems\": 4,\n" keyPipelineLog.info("AliasGenerateHelper.generateNameAlias reqPrompt:{}", prompt.text());
+ "\"items\": {\n" String response = getChatCompletion(prompt);
+ "\"type\": \"string\",\n" keyPipelineLog.info("AliasGenerateHelper.generateNameAlias modelResp:{}", response);
+ "\"description\": \"Assuming you are a data analyst and give a defined " return response;
+ mockType
+ " name: "
+ name + ","
+ "this "
+ mockType
+ " is from database and table: "
+ table + ",This "
+ mockType
+ " calculates the field source: "
+ bizName
+ ", The description of this metrics is: "
+ desc
+ ", provide some aliases for this, please take chinese or english,"
+ "You must adhere to the following rules:\n"
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3.\n"
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions.\n"
+ "3. Please pay attention to the quality of the generated aliases and "
+ " avoid creating aliases that look like test data.\n"
+ "4. Please generate more Chinese aliases."
+ "},\n"
+ "\"additionalProperties\":false}\n"
+ "Please double-check whether the answer conforms to the format described in the JSON-schema.\n"
+ "回答格式示例:"
+ "[\n"
+ " \"人数\",\n"
+ " \"员工人数\",\n"
+ " \"员工数量\",\n"
+ " \"员工总数\"\n"
+ "]\n"
+ "请严格按照示例格式进行生成。"
+ "ANSWER JSON:";
log.info("msg:{}", msg);
return getChatCompletion(msg);
} }
public String generateDimensionValueAlias(String json) { public String generateDimensionValueAlias(String json) {
String msg = "Assuming you are a professional data analyst specializing in indicators,for you a json list" Map<String, Object> variable = new HashMap<>();
+ "the required content to follow is as follows: \n" variable.put("values", json);
+ "1. The format of JSON,\n"
+ "2. Only return in JSON format,\n" Prompt prompt = PromptTemplate.from(VALUE_ALIAS_INSTRUCTION).apply(variable);
+ "3. the array item > 1 and < 5,more alias,\n" keyPipelineLog.info("AliasGenerateHelper.generateValueAlias reqPrompt:{}", prompt.text());
+ "for example\n" String response = getChatCompletion(prompt);
+ "input:[\"qq_music\",\"kugou_music\"],\n" keyPipelineLog.info("AliasGenerateHelper.generateValueAlias modelResp:{}", response);
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n" return response;
+ "input:[\"qq_music\",\"kugou_music\"],\n" }
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n" private String getChatCompletion(Prompt prompt) {
+ "input:[\"大专\",\"本科\",\"硕士研究生\"],\n" SystemMessage from = prompt.toSystemMessage();
+ "out:{\"tran\":[\"大专\",\"本科\",\"硕士研究生\"]," ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
+ "\"alias\":{\"大专\":[\"专科\",\"大学专科\"],\"本科\":[\"学士\",\"本科生\"],\"硕士研究生\":[\"硕士\",\"研究生\"]}},\n" Response<AiMessage> response = chatLanguageModel.generate(from);
+ "now input: " return response.content().text();
+ json + ",\n"
+ "answer json:";
log.info("msg:{}", msg);
return getChatCompletion(msg);
} }
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) { private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {