(improvement)(headless)Refactor the prompts for generating semantic aliases.

This commit is contained in:
jerryjzhang
2024-09-06 17:55:33 +08:00
parent 45777f0abc
commit ee15a88b06
4 changed files with 75 additions and 83 deletions

View File

@@ -26,19 +26,19 @@ import java.util.concurrent.ConcurrentHashMap;
public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
private static final String INSTRUCTION = ""
+ "#Role: You are a data analyst experienced in SQL languages.\n"
+ "#Task: You will be provided a natural language question asked by users,"
+ "\n#Role: You are a data analyst experienced in SQL languages."
+ "#Task: You will be provided with a natural language question asked by users,"
+ "please convert it to a SQL query so that relevant data could be returned "
+ "by executing the SQL query against underlying database.\n"
+ "#Rules:"
+ "by executing the SQL query against underlying database."
+ "\n#Rules:"
+ "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate."
+ "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator."
+ "3.ALWAYS calculate the absolute date range by yourself."
+ "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`."
+ "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed."
+ "6.ONLY respond with the converted SQL statement.\n"
+ "#Exemplars:\n{{exemplar}}"
+ "#Question:{{question}} #Schema:{{schema}} #SideInfo:{{information}} #SQL:";
+ "6.ONLY respond with the converted SQL statement."
+ "\n#Exemplars:\n{{exemplar}}"
+ "Question:{{question}},Schema:{{schema}},SideInfo:{{information}},SQL:";
@Override
public LLMResp generate(LLMReq llmReq) {
@@ -83,7 +83,7 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy {
private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) {
StringBuilder exemplars = new StringBuilder();
for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) {
String exemplarStr = String.format("#Question:%s #Schema:%s #SideInfo:%s #SQL:%s\n",
String exemplarStr = String.format("Question:%s,Schema:%s,SideInfo:%s,SQL:%s\n",
exemplar.getQuestion(), exemplar.getDbSchema(),
exemplar.getSideInfo(), exemplar.getSql());
exemplars.append(exemplarStr);

View File

@@ -318,7 +318,7 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
@Override
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
dimensionReq.getBizName(), "", dimensionReq.getDescription(), false);
dimensionReq.getBizName(), "", dimensionReq.getDescription());
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
});

View File

@@ -511,7 +511,7 @@ public class MetricServiceImpl extends ServiceImpl<MetricDOMapper, MetricDO>
public List<String> mockAlias(MetricBaseReq metricReq, String mockType, User user) {
String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "",
metricReq.getDescription(), !"".equals(metricReq.getDataFormatType()));
metricReq.getDescription());
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
});

View File

@@ -6,97 +6,89 @@ import com.alibaba.fastjson.JSONException;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.input.Prompt;
import dev.langchain4j.model.input.PromptTemplate;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.provider.ModelProvider;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
@Component
@Slf4j
public class AliasGenerateHelper {
public String getChatCompletion(String message) {
SystemMessage from = SystemMessage.from(message);
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
Response<AiMessage> response = chatLanguageModel.generate(from);
log.info("message:{}\n response:{}", message, response);
return response.content().text();
}
private static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline");
private static final String NAME_ALIAS_INSTRUCTION = ""
+ "\n#Role: You are a professional data analyst specializing in metrics and dimensions."
+ "\n#Task: You will be provided with metadata about a metric or dimension, please help "
+ "generate a few aliases in the same language as its `fieldName`."
+ "\n#Rules:"
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3."
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions."
+ "3. Please pay attention to the quality of the generated aliases and "
+ "avoid creating aliases that look like test data."
+ "4. Please output as a json string array."
+ "\n#Metadata: {'table':'{{table}}', 'name':'{{name}}', 'type':'{{type}}', "
+ "'field':'field', 'description':'{{desc}}'}"
+ "\n#Output:";
private static final String VALUE_ALIAS_INSTRUCTION = ""
+ "\n#Role: You are a professional data analyst."
+ "\n#Task: You will be provided with a json array of dimension values,"
+ "please help generate a few aliases for each value."
+ "\n#Rule:"
+ "1. ALWAYS output json array for each value."
+ "2. The aliases should be in the same language as its original value."
+ "\n#Exemplar:"
+ "Values: [\\\"qq_music\\\",\\\"kugou_music\\\"], "
+ "Output: {\\\"tran\\\":[\\\"qq音乐\\\",\\\"酷狗音乐\\\"],"
+ " \\\"alias\\\":{\\\"qq_music\\\":[\\\"q音\\\",\\\"qq音乐\\\"],"
+ " \\\"kugou_music\\\":[\\\"kugou\\\",\\\"酷狗\\\"]}}"
+ "\nValues: {{values}}, Output:";
public String generateAlias(String mockType,
String name,
String bizName,
String table,
String desc,
Boolean isPercentage) {
String msg = "Assuming you are a professional data analyst specializing in metrics and dimensions, "
+ "you have a vast amount of data analysis metrics content. You are familiar with the basic"
+ " format of the content,Now, Construct your answer Based on the following json-schema.\n"
+ "{\n"
+ "\"$schema\": \"http://json-schema.org/draft-07/schema#\",\n"
+ "\"type\": \"array\",\n"
+ "\"minItems\": 2,\n"
+ "\"maxItems\": 4,\n"
+ "\"items\": {\n"
+ "\"type\": \"string\",\n"
+ "\"description\": \"Assuming you are a data analyst and give a defined "
+ mockType
+ " name: "
+ name + ","
+ "this "
+ mockType
+ " is from database and table: "
+ table + ",This "
+ mockType
+ " calculates the field source: "
+ bizName
+ ", The description of this metrics is: "
+ desc
+ ", provide some aliases for this, please take chinese or english,"
+ "You must adhere to the following rules:\n"
+ "1. Please do not generate aliases like xxx1, xxx2, xxx3.\n"
+ "2. Please do not generate aliases that are the same as the original names of metrics/dimensions.\n"
+ "3. Please pay attention to the quality of the generated aliases and "
+ " avoid creating aliases that look like test data.\n"
+ "4. Please generate more Chinese aliases."
+ "},\n"
+ "\"additionalProperties\":false}\n"
+ "Please double-check whether the answer conforms to the format described in the JSON-schema.\n"
+ "回答格式示例:"
+ "[\n"
+ " \"人数\",\n"
+ " \"员工人数\",\n"
+ " \"员工数量\",\n"
+ " \"员工总数\"\n"
+ "]\n"
+ "请严格按照示例格式进行生成。"
+ "ANSWER JSON:";
log.info("msg:{}", msg);
return getChatCompletion(msg);
String desc) {
Map<String, Object> variable = new HashMap<>();
variable.put("table", table);
variable.put("name", name);
variable.put("field", bizName);
variable.put("type", mockType);
variable.put("desc", desc);
Prompt prompt = PromptTemplate.from(NAME_ALIAS_INSTRUCTION).apply(variable);
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias reqPrompt:{}", prompt.text());
String response = getChatCompletion(prompt);
keyPipelineLog.info("AliasGenerateHelper.generateNameAlias modelResp:{}", response);
return response;
}
public String generateDimensionValueAlias(String json) {
String msg = "Assuming you are a professional data analyst specializing in indicators,for you a json list"
+ "the required content to follow is as follows: \n"
+ "1. The format of JSON,\n"
+ "2. Only return in JSON format,\n"
+ "3. the array item > 1 and < 5,more alias,\n"
+ "for example\n"
+ "input:[\"qq_music\",\"kugou_music\"],\n"
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
+ "input:[\"qq_music\",\"kugou_music\"],\n"
+ "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"],"
+ "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n"
+ "input:[\"大专\",\"本科\",\"硕士研究生\"],\n"
+ "out:{\"tran\":[\"大专\",\"本科\",\"硕士研究生\"],"
+ "\"alias\":{\"大专\":[\"专科\",\"大学专科\"],\"本科\":[\"学士\",\"本科生\"],\"硕士研究生\":[\"硕士\",\"研究生\"]}},\n"
+ "now input: "
+ json + ",\n"
+ "answer json:";
log.info("msg:{}", msg);
return getChatCompletion(msg);
Map<String, Object> variable = new HashMap<>();
variable.put("values", json);
Prompt prompt = PromptTemplate.from(VALUE_ALIAS_INSTRUCTION).apply(variable);
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias reqPrompt:{}", prompt.text());
String response = getChatCompletion(prompt);
keyPipelineLog.info("AliasGenerateHelper.generateValueAlias modelResp:{}", response);
return response;
}
private String getChatCompletion(Prompt prompt) {
SystemMessage from = prompt.toSystemMessage();
ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel();
Response<AiMessage> response = chatLanguageModel.generate(from);
return response.content().text();
}
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {