From ee15a88b06bed8fcd87923f271cc9ccea96f375f Mon Sep 17 00:00:00 2001 From: jerryjzhang Date: Fri, 6 Sep 2024 17:55:33 +0800 Subject: [PATCH] (improvement)(headless)Refactor the prompts for generating semantic aliases. --- .../parser/llm/OnePassSCSqlGenStrategy.java | 16 +- .../service/impl/DimensionServiceImpl.java | 2 +- .../service/impl/MetricServiceImpl.java | 2 +- .../server/utils/AliasGenerateHelper.java | 138 +++++++++--------- 4 files changed, 75 insertions(+), 83 deletions(-) diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/OnePassSCSqlGenStrategy.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/OnePassSCSqlGenStrategy.java index 22ac33a87..79145820c 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/OnePassSCSqlGenStrategy.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/OnePassSCSqlGenStrategy.java @@ -26,19 +26,19 @@ import java.util.concurrent.ConcurrentHashMap; public class OnePassSCSqlGenStrategy extends SqlGenStrategy { private static final String INSTRUCTION = "" - + "#Role: You are a data analyst experienced in SQL languages.\n" - + "#Task: You will be provided a natural language question asked by users," + + "\n#Role: You are a data analyst experienced in SQL languages." + + "#Task: You will be provided with a natural language question asked by users," + "please convert it to a SQL query so that relevant data could be returned " - + "by executing the SQL query against underlying database.\n" - + "#Rules:" + + "by executing the SQL query against underlying database." + + "\n#Rules:" + "1.ALWAYS generate column specified in the `Schema`, DO NOT hallucinate." + "2.ALWAYS specify date filter using `>`,`<`,`>=`,`<=` operator." + "3.ALWAYS calculate the absolute date range by yourself." + "4.DO NOT include date filter in the where clause if not explicitly expressed in the `Question`." + "5.DO NOT miss the AGGREGATE operator of metrics, always add it if needed." - + "6.ONLY respond with the converted SQL statement.\n" - + "#Exemplars:\n{{exemplar}}" - + "#Question:{{question}} #Schema:{{schema}} #SideInfo:{{information}} #SQL:"; + + "6.ONLY respond with the converted SQL statement." + + "\n#Exemplars:\n{{exemplar}}" + + "Question:{{question}},Schema:{{schema}},SideInfo:{{information}},SQL:"; @Override public LLMResp generate(LLMReq llmReq) { @@ -83,7 +83,7 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy { private Prompt generatePrompt(LLMReq llmReq, LLMResp llmResp) { StringBuilder exemplars = new StringBuilder(); for (Text2SQLExemplar exemplar : llmReq.getDynamicExemplars()) { - String exemplarStr = String.format("#Question:%s #Schema:%s #SideInfo:%s #SQL:%s\n", + String exemplarStr = String.format("Question:%s,Schema:%s,SideInfo:%s,SQL:%s\n", exemplar.getQuestion(), exemplar.getDbSchema(), exemplar.getSideInfo(), exemplar.getSql()); exemplars.append(exemplarStr); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/DimensionServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/DimensionServiceImpl.java index 07159689c..de068f584 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/DimensionServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/DimensionServiceImpl.java @@ -318,7 +318,7 @@ public class DimensionServiceImpl extends ServiceImpl mockAlias(DimensionReq dimensionReq, String mockType, User user) { String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(), - dimensionReq.getBizName(), "", dimensionReq.getDescription(), false); + dimensionReq.getBizName(), "", dimensionReq.getDescription()); String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias); return JSONObject.parseObject(ret, new TypeReference>() { }); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/MetricServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/MetricServiceImpl.java index 2e733f11b..f0d5b4aa8 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/MetricServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/MetricServiceImpl.java @@ -511,7 +511,7 @@ public class MetricServiceImpl extends ServiceImpl public List mockAlias(MetricBaseReq metricReq, String mockType, User user) { String mockAlias = aliasGenerateHelper.generateAlias(mockType, metricReq.getName(), metricReq.getBizName(), "", - metricReq.getDescription(), !"".equals(metricReq.getDataFormatType())); + metricReq.getDescription()); String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", ""); return JSONObject.parseObject(ret, new TypeReference>() { }); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/AliasGenerateHelper.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/AliasGenerateHelper.java index dd8aa2223..368269449 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/AliasGenerateHelper.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/AliasGenerateHelper.java @@ -6,97 +6,89 @@ import com.alibaba.fastjson.JSONException; import dev.langchain4j.data.message.AiMessage; import dev.langchain4j.data.message.SystemMessage; import dev.langchain4j.model.chat.ChatLanguageModel; +import dev.langchain4j.model.input.Prompt; +import dev.langchain4j.model.input.PromptTemplate; import dev.langchain4j.model.output.Response; import dev.langchain4j.provider.ModelProvider; import lombok.extern.slf4j.Slf4j; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; +import java.util.HashMap; +import java.util.Map; + @Component @Slf4j public class AliasGenerateHelper { - public String getChatCompletion(String message) { - SystemMessage from = SystemMessage.from(message); - ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel(); - Response response = chatLanguageModel.generate(from); - log.info("message:{}\n response:{}", message, response); - return response.content().text(); - } + private static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline"); + + private static final String NAME_ALIAS_INSTRUCTION = "" + + "\n#Role: You are a professional data analyst specializing in metrics and dimensions." + + "\n#Task: You will be provided with metadata about a metric or dimension, please help " + + "generate a few aliases in the same language as its `fieldName`." + + "\n#Rules:" + + "1. Please do not generate aliases like xxx1, xxx2, xxx3." + + "2. Please do not generate aliases that are the same as the original names of metrics/dimensions." + + "3. Please pay attention to the quality of the generated aliases and " + + "avoid creating aliases that look like test data." + + "4. Please output as a json string array." + + "\n#Metadata: {'table':'{{table}}', 'name':'{{name}}', 'type':'{{type}}', " + + "'field':'field', 'description':'{{desc}}'}" + + "\n#Output:"; + + private static final String VALUE_ALIAS_INSTRUCTION = "" + + "\n#Role: You are a professional data analyst." + + "\n#Task: You will be provided with a json array of dimension values," + + "please help generate a few aliases for each value." + + "\n#Rule:" + + "1. ALWAYS output json array for each value." + + "2. The aliases should be in the same language as its original value." + + "\n#Exemplar:" + + "Values: [\\\"qq_music\\\",\\\"kugou_music\\\"], " + + "Output: {\\\"tran\\\":[\\\"qq音乐\\\",\\\"酷狗音乐\\\"]," + + " \\\"alias\\\":{\\\"qq_music\\\":[\\\"q音\\\",\\\"qq音乐\\\"]," + + " \\\"kugou_music\\\":[\\\"kugou\\\",\\\"酷狗\\\"]}}" + + "\nValues: {{values}}, Output:"; public String generateAlias(String mockType, String name, String bizName, String table, - String desc, - Boolean isPercentage) { - String msg = "Assuming you are a professional data analyst specializing in metrics and dimensions, " - + "you have a vast amount of data analysis metrics content. You are familiar with the basic" - + " format of the content,Now, Construct your answer Based on the following json-schema.\n" - + "{\n" - + "\"$schema\": \"http://json-schema.org/draft-07/schema#\",\n" - + "\"type\": \"array\",\n" - + "\"minItems\": 2,\n" - + "\"maxItems\": 4,\n" - + "\"items\": {\n" - + "\"type\": \"string\",\n" - + "\"description\": \"Assuming you are a data analyst and give a defined " - + mockType - + " name: " - + name + "," - + "this " - + mockType - + " is from database and table: " - + table + ",This " - + mockType - + " calculates the field source: " - + bizName - + ", The description of this metrics is: " - + desc - + ", provide some aliases for this, please take chinese or english," - + "You must adhere to the following rules:\n" - + "1. Please do not generate aliases like xxx1, xxx2, xxx3.\n" - + "2. Please do not generate aliases that are the same as the original names of metrics/dimensions.\n" - + "3. Please pay attention to the quality of the generated aliases and " - + " avoid creating aliases that look like test data.\n" - + "4. Please generate more Chinese aliases." - + "},\n" - + "\"additionalProperties\":false}\n" - + "Please double-check whether the answer conforms to the format described in the JSON-schema.\n" - + "回答格式示例:" - + "[\n" - + " \"人数\",\n" - + " \"员工人数\",\n" - + " \"员工数量\",\n" - + " \"员工总数\"\n" - + "]\n" - + "请严格按照示例格式进行生成。" - + "ANSWER JSON:"; - log.info("msg:{}", msg); - return getChatCompletion(msg); + String desc) { + Map variable = new HashMap<>(); + variable.put("table", table); + variable.put("name", name); + variable.put("field", bizName); + variable.put("type", mockType); + variable.put("desc", desc); + + Prompt prompt = PromptTemplate.from(NAME_ALIAS_INSTRUCTION).apply(variable); + keyPipelineLog.info("AliasGenerateHelper.generateNameAlias reqPrompt:{}", prompt.text()); + String response = getChatCompletion(prompt); + keyPipelineLog.info("AliasGenerateHelper.generateNameAlias modelResp:{}", response); + return response; } public String generateDimensionValueAlias(String json) { - String msg = "Assuming you are a professional data analyst specializing in indicators,for you a json list," - + "the required content to follow is as follows: \n" - + "1. The format of JSON,\n" - + "2. Only return in JSON format,\n" - + "3. the array item > 1 and < 5,more alias,\n" - + "for example:\n" - + "input:[\"qq_music\",\"kugou_music\"],\n" - + "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"]," - + "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n" - + "input:[\"qq_music\",\"kugou_music\"],\n" - + "out:{\"tran\":[\"qq音乐\",\"酷狗音乐\"]," - + "\"alias\":{\"qq_music\":[\"q音\",\"qq音乐\"],\"kugou_music\":[\"kugou\",\"酷狗\"]}},\n" - + "input:[\"大专\",\"本科\",\"硕士研究生\"],\n" - + "out:{\"tran\":[\"大专\",\"本科\",\"硕士研究生\"]," - + "\"alias\":{\"大专\":[\"专科\",\"大学专科\"],\"本科\":[\"学士\",\"本科生\"],\"硕士研究生\":[\"硕士\",\"研究生\"]}},\n" - + "now input: " - + json + ",\n" - + "answer json:"; - log.info("msg:{}", msg); - return getChatCompletion(msg); + Map variable = new HashMap<>(); + variable.put("values", json); + + Prompt prompt = PromptTemplate.from(VALUE_ALIAS_INSTRUCTION).apply(variable); + keyPipelineLog.info("AliasGenerateHelper.generateValueAlias reqPrompt:{}", prompt.text()); + String response = getChatCompletion(prompt); + keyPipelineLog.info("AliasGenerateHelper.generateValueAlias modelResp:{}", response); + + return response; + } + + private String getChatCompletion(Prompt prompt) { + SystemMessage from = prompt.toSystemMessage(); + ChatLanguageModel chatLanguageModel = ModelProvider.getChatModel(); + Response response = chatLanguageModel.generate(from); + return response.content().text(); } private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {