From 008c1c35d8956d48764e2305bcb8a060c148a99b Mon Sep 17 00:00:00 2001 From: jerryjzhang Date: Wed, 5 Jun 2024 10:33:04 +0800 Subject: [PATCH] (improvement)(headless)Commit new impl of SqlGenStrategy --- .../parser/llm/OnePassSCSqlGenStrategy.java | 98 ++++++----- .../core/chat/parser/llm/PromptGenerator.java | 156 ------------------ .../core/chat/parser/llm/PromptHelper.java | 97 +++++++++++ .../core/chat/parser/llm/SqlGenStrategy.java | 9 +- .../parser/llm/TwoPassSCSqlGenStrategy.java | 78 ++++++--- .../core/chat/query/llm/s2sql/LLMReq.java | 2 +- .../headless/core/config/ParserConfig.java | 12 +- .../s2sql_exemplar3_transformed.json | 62 +++---- .../src/main/resources/application-local.yaml | 2 +- .../src/main/resources/s2ql_exemplar.json | 62 +++---- .../src/test/resources/application-local.yaml | 2 +- .../src/test/resources/s2ql_exemplar.json | 62 +++---- 12 files changed, 318 insertions(+), 324 deletions(-) delete mode 100644 headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptGenerator.java create mode 100644 headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptHelper.java diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/OnePassSCSqlGenStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/OnePassSCSqlGenStrategy.java index 315febb79..a3b74731d 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/OnePassSCSqlGenStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/OnePassSCSqlGenStrategy.java @@ -1,6 +1,6 @@ package com.tencent.supersonic.headless.core.chat.parser.llm; -import com.tencent.supersonic.common.util.JsonUtil; +import com.google.common.collect.Lists; import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq; import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp; import dev.langchain4j.data.message.AiMessage; @@ -15,12 +15,7 @@ import org.springframework.stereotype.Service; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.stream.Collectors; - -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_EXEMPLAR_RECALL_NUMBER; -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_FEW_SHOT_NUMBER; -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_SELF_CONSISTENCY_NUMBER; +import java.util.concurrent.ConcurrentHashMap; @Service @@ -29,46 +24,75 @@ public class OnePassSCSqlGenStrategy extends SqlGenStrategy { @Override public LLMResp generate(LLMReq llmReq) { - //1.retriever sqlExamples and generate exampleListPool - keyPipelineLog.info("OnePassSCSqlGenStrategy llmReq:{}", llmReq); + //1.recall exemplars + keyPipelineLog.info("OnePassSCSqlGenStrategy llmReq:\n{}", llmReq); + List>> exemplarsList = promptHelper.getFewShotExemplars(llmReq); - int exemplarRecallNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_EXEMPLAR_RECALL_NUMBER)); - int fewShotNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_FEW_SHOT_NUMBER)); - int selfConsistencyNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_SELF_CONSISTENCY_NUMBER)); + //2.generate sql generation prompt for each self-consistency inference + Map>> prompt2Exemplar = new HashMap<>(); + for (List> exemplars : exemplarsList) { + Prompt prompt = generatePrompt(llmReq, exemplars); + prompt2Exemplar.put(prompt, exemplars); + } - List> sqlExamples = exemplarManager.recallExemplars(llmReq.getQueryText(), - exemplarRecallNumber); - List>> exampleListPool = promptGenerator.getExampleCombos(sqlExamples, - fewShotNumber, selfConsistencyNumber); - - //2.generator linking and sql prompt by sqlExamples,and parallel generate response. - List linkingSqlPromptPool = promptGenerator.generatePromptPool(llmReq, exampleListPool, true); - List llmResults = new CopyOnWriteArrayList<>(); - linkingSqlPromptPool.parallelStream().forEach(linkingSqlPrompt -> { - Prompt prompt = PromptTemplate.from(JsonUtil.toString(linkingSqlPrompt)) - .apply(new HashMap<>()); - keyPipelineLog.info("OnePassSCSqlGenStrategy reqPrompt:{}", prompt.toSystemMessage()); - ChatLanguageModel chatLanguageModel = getChatLanguageModel(llmReq.getLlmConfig()); - Response response = chatLanguageModel.generate(prompt.toSystemMessage()); + //3.perform multiple self-consistency inferences parallelly + Map prompt2Output = new ConcurrentHashMap<>(); + prompt2Exemplar.keySet().parallelStream().forEach(prompt -> { + keyPipelineLog.info("OnePassSCSqlGenStrategy reqPrompt:\n{}", prompt.toSystemMessage()); + ChatLanguageModel chatLanguageModel = getChatLanguageModel(llmReq.getLlmConfig()); + Response response = chatLanguageModel.generate(prompt.toSystemMessage()); String result = response.content().text(); - llmResults.add(result); - keyPipelineLog.info("OnePassSCSqlGenStrategy modelResp:{}", result); + prompt2Output.put(prompt, result); + keyPipelineLog.info("OnePassSCSqlGenStrategy modelResp:\n{}", result); } ); - //3.format response. - List sqlList = llmResults.stream() - .map(OutputFormat::getSql).collect(Collectors.toList()); - Pair> sqlMapPair = OutputFormat.selfConsistencyVote(sqlList); + //4.format response. + Pair> sqlMapPair = OutputFormat.selfConsistencyVote( + Lists.newArrayList(prompt2Output.values())); + LLMResp llmResp = new LLMResp(); + llmResp.setQuery(llmReq.getQueryText()); + //TODO: should use the same few-shot exemplars as the one chose by self-consistency vote + llmResp.setSqlRespMap(OutputFormat.buildSqlRespMap(exemplarsList.get(0), sqlMapPair.getRight())); - LLMResp result = new LLMResp(); - result.setQuery(llmReq.getQueryText()); - result.setSqlRespMap(OutputFormat.buildSqlRespMap(sqlExamples, sqlMapPair.getRight())); - return result; + return llmResp; + } + + private Prompt generatePrompt(LLMReq llmReq, List> fewshotExampleList) { + String instruction = "" + + "#Role: You are a data analyst experienced in SQL languages.\n" + + "#Task: You will be provided a natural language query asked by business users," + + "please convert it to a SQL query so that relevant answer could be returned to the user " + + "by executing the SQL query against underlying database.\n" + + "#Rules:\n" + + "1.Always use `数据日期` as the date field.\n" + + "2.Always use `datediff` function to calculate date range.\n" + + "3.Only output SQL statement.\n" + + "#Exemplars:\n%s" + + "#UserQuery: %s " + + "#DatabaseMetadata: %s " + + "#SQL: "; + + StringBuilder exemplarsStr = new StringBuilder(); + for (Map example : fewshotExampleList) { + String metadata = example.get("dbSchema"); + String question = example.get("questionAugmented"); + String sql = example.get("sql"); + String exemplarStr = String.format("#UserQuery: %s #DatabaseMetadata: %s #SQL: %s\n", + question, metadata, sql); + exemplarsStr.append(exemplarStr); + } + + Pair questionPrompt = promptHelper.transformQuestionPrompt(llmReq); + String dbSchema = questionPrompt.getLeft(); + String questionAugmented = questionPrompt.getRight(); + String promptStr = String.format(instruction, exemplarsStr, questionAugmented, dbSchema); + + return PromptTemplate.from(promptStr).apply(new HashMap<>()); } @Override public void afterPropertiesSet() { - SqlGenStrategyFactory.addSqlGenerationForFactory(LLMReq.SqlGenType.ONE_PASS_AUTO_COT_SELF_CONSISTENCY, this); + SqlGenStrategyFactory.addSqlGenerationForFactory(LLMReq.SqlGenType.ONE_PASS_SELF_CONSISTENCY, this); } } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptGenerator.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptGenerator.java deleted file mode 100644 index c5381d7bf..000000000 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptGenerator.java +++ /dev/null @@ -1,156 +0,0 @@ -package com.tencent.supersonic.headless.core.chat.parser.llm; - -import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq; -import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.springframework.stereotype.Component; -import org.springframework.util.CollectionUtils; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -@Component -@Slf4j -public class PromptGenerator { - - public String generatorLinkingAndSqlPrompt(LLMReq llmReq, List> exampleList) { - String instruction = - "# Find the schema_links for generating SQL queries for each question based on the database schema " - + "and Foreign keys. Then use the the schema links to generate the " - + "SQL queries for each of the questions."; - - List exampleKeys = Arrays.asList("questionAugmented", "dbSchema", "generatedSchemaLinkingCoT", "sql"); - String exampleTemplate = "dbSchema\nQ: questionAugmented\nA: generatedSchemaLinkingCoT\nSQL: sql"; - - String exampleFormat = InputFormat.format(exampleTemplate, exampleKeys, exampleList); - - Pair questionPrompt = transformQuestionPrompt(llmReq); - String dbSchema = questionPrompt.getLeft(); - String questionAugmented = questionPrompt.getRight(); - - String newCaseTemplate = "%s\nQ: %s\nA: Let’s think step by step. In the question \"%s\", we are asked:"; - String newCasePrompt = String.format(newCaseTemplate, dbSchema, questionAugmented, questionAugmented); - - return instruction + InputFormat.SEPERATOR + exampleFormat + InputFormat.SEPERATOR + newCasePrompt; - } - - public String generateLinkingPrompt(LLMReq llmReq, List> exampleList) { - String instruction = "# Find the schema_links for generating SQL queries for each question " - + "based on the database schema and Foreign keys."; - - List exampleKeys = Arrays.asList("questionAugmented", "dbSchema", "generatedSchemaLinkingCoT"); - String exampleTemplate = "dbSchema\nQ: questionAugmented\nA: generatedSchemaLinkingCoT"; - String exampleFormat = InputFormat.format(exampleTemplate, exampleKeys, exampleList); - - Pair questionPrompt = transformQuestionPrompt(llmReq); - String dbSchema = questionPrompt.getLeft(); - String questionAugmented = questionPrompt.getRight(); - String newCaseTemplate = "%s\nQ: %s\nA: Let’s think step by step. In the question \"%s\", we are asked:"; - String newCasePrompt = String.format(newCaseTemplate, dbSchema, questionAugmented, questionAugmented); - - return instruction + InputFormat.SEPERATOR + exampleFormat + InputFormat.SEPERATOR + newCasePrompt; - } - - public String generateSqlPrompt(LLMReq llmReq, String schemaLinkStr, List> fewshotExampleList) { - String instruction = "# Use the the schema links to generate the SQL queries for each of the questions."; - List exampleKeys = Arrays.asList("questionAugmented", "dbSchema", "generatedSchemaLinkings", "sql"); - String exampleTemplate = "dbSchema\nQ: questionAugmented\n" + "Schema_links: generatedSchemaLinkings\n" - + "SQL: sql"; - - String schemaLinkingPrompt = InputFormat.format(exampleTemplate, exampleKeys, fewshotExampleList); - Pair questionPrompt = transformQuestionPrompt(llmReq); - String dbSchema = questionPrompt.getLeft(); - String questionAugmented = questionPrompt.getRight(); - String newCaseTemplate = "%s\nQ: %s\nSchema_links: %s\nSQL: "; - String newCasePrompt = String.format(newCaseTemplate, dbSchema, questionAugmented, schemaLinkStr); - return instruction + InputFormat.SEPERATOR + schemaLinkingPrompt + InputFormat.SEPERATOR + newCasePrompt; - } - - public List generatePromptPool(LLMReq llmReq, List>> exampleListPool, - boolean isSqlPrompt) { - List promptPool = new ArrayList<>(); - for (List> exampleList : exampleListPool) { - String prompt; - if (isSqlPrompt) { - prompt = generatorLinkingAndSqlPrompt(llmReq, exampleList); - } else { - prompt = generateLinkingPrompt(llmReq, exampleList); - } - promptPool.add(prompt); - } - return promptPool; - } - - public List>> getExampleCombos(List> exampleList, int numFewShots, - int numSelfConsistency) { - List>> results = new ArrayList<>(); - for (int i = 0; i < numSelfConsistency; i++) { - List> shuffledList = new ArrayList<>(exampleList); - Collections.shuffle(shuffledList); - results.add(shuffledList.subList(0, numFewShots)); - } - return results; - } - - public Pair transformQuestionPrompt(LLMReq llmReq) { - String modelName = llmReq.getSchema().getDataSetName(); - List fieldNameList = llmReq.getSchema().getFieldNameList(); - List linking = llmReq.getLinking(); - String currentDate = llmReq.getCurrentDate(); - String priorExts = llmReq.getPriorExts(); - - String dbSchema = "Table: " + modelName + ", Columns = " + fieldNameList + "\nForeign_keys: []"; - - List priorLinkingList = new ArrayList<>(); - for (ElementValue priorLinking : linking) { - String fieldName = priorLinking.getFieldName(); - String fieldValue = priorLinking.getFieldValue(); - priorLinkingList.add("‘" + fieldValue + "‘是一个‘" + fieldName + "‘"); - } - String currentDataStr = "当前的日期是" + currentDate; - String linkingListStr = String.join(",", priorLinkingList); - String termStr = getTermStr(llmReq); - String questionAugmented = String.format("%s (补充信息:%s . %s . %s) (备注: %s)", llmReq.getQueryText(), - linkingListStr, currentDataStr, termStr, priorExts); - return Pair.of(dbSchema, questionAugmented); - } - - private String getTermStr(LLMReq llmReq) { - List terms = llmReq.getSchema().getTerms(); - StringBuilder termsDesc = new StringBuilder(); - if (!CollectionUtils.isEmpty(terms)) { - termsDesc.append("相关业务术语:"); - for (int idx = 0; idx < terms.size(); idx++) { - LLMReq.Term term = terms.get(idx); - String name = term.getName(); - String description = term.getDescription(); - List alias = term.getAlias(); - String descPart = StringUtils.isBlank(description) ? "" : String.format(",它通常是指<%s>", description); - String aliasPart = CollectionUtils.isEmpty(alias) ? "" : String.format(",类似的表达还有%s", alias); - termsDesc.append(String.format("%d.<%s>是业务术语%s%s;", idx + 1, name, descPart, aliasPart)); - } - if (termsDesc.length() > 0) { - termsDesc.setLength(termsDesc.length() - 1); - } - } - return termsDesc.toString(); - } - - public List generateSqlPromptPool(LLMReq llmReq, List schemaLinkStrPool, - List>> fewshotExampleListPool) { - List sqlPromptPool = new ArrayList<>(); - for (int i = 0; i < schemaLinkStrPool.size(); i++) { - String schemaLinkStr = schemaLinkStrPool.get(i); - List> fewshotExampleList = fewshotExampleListPool.get(i); - String sqlPrompt = generateSqlPrompt(llmReq, schemaLinkStr, fewshotExampleList); - sqlPromptPool.add(sqlPrompt); - } - return sqlPromptPool; - } - -} diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptHelper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptHelper.java new file mode 100644 index 000000000..262309487 --- /dev/null +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/PromptHelper.java @@ -0,0 +1,97 @@ +package com.tencent.supersonic.headless.core.chat.parser.llm; + +import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq; +import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq.ElementValue; +import com.tencent.supersonic.headless.core.config.ParserConfig; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_EXEMPLAR_RECALL_NUMBER; +import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_FEW_SHOT_NUMBER; +import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_SELF_CONSISTENCY_NUMBER; + +@Component +@Slf4j +public class PromptHelper { + + @Autowired + private ParserConfig parserConfig; + + @Autowired + private ExemplarManager exemplarManager; + + public List>> getFewShotExemplars(LLMReq llmReq) { + int exemplarRecallNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_EXEMPLAR_RECALL_NUMBER)); + int fewShotNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_FEW_SHOT_NUMBER)); + int selfConsistencyNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_SELF_CONSISTENCY_NUMBER)); + + List> exemplars = exemplarManager.recallExemplars(llmReq.getQueryText(), + exemplarRecallNumber); + List>> results = new ArrayList<>(); + + // use random collection of exemplars for each self-consistency inference + for (int i = 0; i < selfConsistencyNumber; i++) { + List> shuffledList = new ArrayList<>(exemplars); + Collections.shuffle(shuffledList); + results.add(shuffledList.subList(0, fewShotNumber)); + } + + return results; + } + + public Pair transformQuestionPrompt(LLMReq llmReq) { + String tableName = llmReq.getSchema().getDataSetName(); + List fieldNameList = llmReq.getSchema().getFieldNameList(); + List linkedValues = llmReq.getLinking(); + String currentDate = llmReq.getCurrentDate(); + String priorExts = llmReq.getPriorExts(); + + String dbSchema = "Table: " + tableName + ", Columns = " + fieldNameList; + + List priorLinkingList = new ArrayList<>(); + for (ElementValue value : linkedValues) { + String fieldName = value.getFieldName(); + String fieldValue = value.getFieldValue(); + priorLinkingList.add("‘" + fieldValue + "‘是一个‘" + fieldName + "‘"); + } + String currentDataStr = "current date is " + currentDate; + String linkingListStr = String.join(",", priorLinkingList); + String termStr = getTermStr(llmReq); + String questionAugmented = String.format("%s (补充信息:%s . %s . %s) (备注: %s)", llmReq.getQueryText(), + linkingListStr, currentDataStr, termStr, priorExts); + + return Pair.of(dbSchema, questionAugmented); + } + + private String getTermStr(LLMReq llmReq) { + List terms = llmReq.getSchema().getTerms(); + StringBuilder termsDesc = new StringBuilder(); + if (!CollectionUtils.isEmpty(terms)) { + termsDesc.append("相关业务术语:"); + for (int idx = 0; idx < terms.size(); idx++) { + LLMReq.Term term = terms.get(idx); + String name = term.getName(); + String description = term.getDescription(); + List alias = term.getAlias(); + String descPart = StringUtils.isBlank(description) ? "" : String.format(",它通常是指<%s>", description); + String aliasPart = CollectionUtils.isEmpty(alias) ? "" : String.format(",类似的表达还有%s", alias); + termsDesc.append(String.format("%d.<%s>是业务术语%s%s;", idx + 1, name, descPart, aliasPart)); + } + if (termsDesc.length() > 0) { + termsDesc.setLength(termsDesc.length() - 1); + } + } + + return termsDesc.toString(); + } + +} diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/SqlGenStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/SqlGenStrategy.java index 02dd8dbfa..1ef10b3b4 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/SqlGenStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/SqlGenStrategy.java @@ -3,7 +3,6 @@ package com.tencent.supersonic.headless.core.chat.parser.llm; import com.tencent.supersonic.headless.api.pojo.LLMConfig; import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMReq; import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMResp; -import com.tencent.supersonic.headless.core.config.ParserConfig; import com.tencent.supersonic.headless.core.utils.S2ChatModelProvider; import dev.langchain4j.model.chat.ChatLanguageModel; import org.slf4j.Logger; @@ -22,13 +21,7 @@ public abstract class SqlGenStrategy implements InitializingBean { protected static final Logger keyPipelineLog = LoggerFactory.getLogger("keyPipeline"); @Autowired - protected ExemplarManager exemplarManager; - - @Autowired - protected ParserConfig parserConfig; - - @Autowired - protected PromptGenerator promptGenerator; + protected PromptHelper promptHelper; protected ChatLanguageModel getChatLanguageModel(LLMConfig llmConfig) { return S2ChatModelProvider.provide(llmConfig); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/TwoPassSCSqlGenStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/TwoPassSCSqlGenStrategy.java index 91d2616bf..4126f3682 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/TwoPassSCSqlGenStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/TwoPassSCSqlGenStrategy.java @@ -12,34 +12,31 @@ import dev.langchain4j.model.output.Response; import org.apache.commons.lang3.tuple.Pair; import org.springframework.stereotype.Service; +import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.CopyOnWriteArrayList; -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_EXEMPLAR_RECALL_NUMBER; -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_FEW_SHOT_NUMBER; -import static com.tencent.supersonic.headless.core.config.ParserConfig.PARSER_SELF_CONSISTENCY_NUMBER; - @Service +@Deprecated public class TwoPassSCSqlGenStrategy extends SqlGenStrategy { @Override public LLMResp generate(LLMReq llmReq) { - //1.retriever sqlExamples and generate exampleListPool + //1.recall exemplars keyPipelineLog.info("TwoPassSCSqlGenStrategy llmReq:{}", llmReq); - int exemplarRecallNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_EXEMPLAR_RECALL_NUMBER)); - int fewShotNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_FEW_SHOT_NUMBER)); - int selfConsistencyNumber = Integer.valueOf(parserConfig.getParameterValue(PARSER_SELF_CONSISTENCY_NUMBER)); + List>> exampleListPool = promptHelper.getFewShotExemplars(llmReq); - List> sqlExamples = exemplarManager.recallExemplars(llmReq.getQueryText(), - exemplarRecallNumber); - List>> exampleListPool = promptGenerator.getExampleCombos(sqlExamples, - fewShotNumber, selfConsistencyNumber); + //2.generate schema linking prompt for each self-consistency inference + List linkingPromptPool = new ArrayList<>(); + for (List> exampleList : exampleListPool) { + String prompt = generateLinkingPrompt(llmReq, exampleList); + linkingPromptPool.add(prompt); + } - //2.generator linking prompt,and parallel generate response. - List linkingPromptPool = promptGenerator.generatePromptPool(llmReq, exampleListPool, false); List linkingResults = new CopyOnWriteArrayList<>(); ChatLanguageModel chatLanguageModel = getChatLanguageModel(llmReq.getLlmConfig()); linkingPromptPool.parallelStream().forEach( @@ -53,8 +50,17 @@ public class TwoPassSCSqlGenStrategy extends SqlGenStrategy { } ); List sortedList = OutputFormat.formatList(linkingResults); - //3.generator sql prompt,and parallel generate response. - List sqlPromptPool = promptGenerator.generateSqlPromptPool(llmReq, sortedList, exampleListPool); + + //3.generate sql generation prompt for each self-consistency inference + List sqlPromptPool = new ArrayList<>(); + for (int i = 0; i < sortedList.size(); i++) { + String schemaLinkStr = sortedList.get(i); + List> fewshotExampleList = exampleListPool.get(i); + String sqlPrompt = generateSqlPrompt(llmReq, schemaLinkStr, fewshotExampleList); + sqlPromptPool.add(sqlPrompt); + } + + //4.perform multiple self-consistency inferences parallelly List sqlTaskPool = new CopyOnWriteArrayList<>(); sqlPromptPool.parallelStream().forEach(sqlPrompt -> { Prompt linkingPrompt = PromptTemplate.from(JsonUtil.toString(sqlPrompt)).apply(new HashMap<>()); @@ -64,15 +70,49 @@ public class TwoPassSCSqlGenStrategy extends SqlGenStrategy { keyPipelineLog.info("TwoPassSCSqlGenStrategy step two modelResp:{}", result); sqlTaskPool.add(result); }); - //4.format response. - Pair> sqlMapPair = OutputFormat.selfConsistencyVote(sqlTaskPool); + //5.format response. + Pair> sqlMapPair = OutputFormat.selfConsistencyVote(sqlTaskPool); LLMResp llmResp = new LLMResp(); llmResp.setQuery(llmReq.getQueryText()); - llmResp.setSqlRespMap(OutputFormat.buildSqlRespMap(sqlExamples, sqlMapPair.getRight())); + //TODO: should use the same few-shot exemplars as the one chose by self-consistency vote + llmResp.setSqlRespMap(OutputFormat.buildSqlRespMap(exampleListPool.get(0), sqlMapPair.getRight())); return llmResp; } + private String generateLinkingPrompt(LLMReq llmReq, List> exampleList) { + String instruction = "# Find the schema_links for generating SQL queries for each question " + + "based on the database schema and Foreign keys."; + + List exampleKeys = Arrays.asList("questionAugmented", "dbSchema", "generatedSchemaLinkingCoT"); + String exampleTemplate = "dbSchema\nQ: questionAugmented\nA: generatedSchemaLinkingCoT"; + String exampleFormat = InputFormat.format(exampleTemplate, exampleKeys, exampleList); + + Pair questionPrompt = promptHelper.transformQuestionPrompt(llmReq); + String dbSchema = questionPrompt.getLeft(); + String questionAugmented = questionPrompt.getRight(); + String newCaseTemplate = "%s\nQ: %s\nA: Let’s think step by step. In the question \"%s\", we are asked:"; + String newCasePrompt = String.format(newCaseTemplate, dbSchema, questionAugmented, questionAugmented); + + return instruction + InputFormat.SEPERATOR + exampleFormat + InputFormat.SEPERATOR + newCasePrompt; + } + + private String generateSqlPrompt(LLMReq llmReq, String schemaLinkStr, + List> fewshotExampleList) { + String instruction = "# Use the the schema links to generate the SQL queries for each of the questions."; + List exampleKeys = Arrays.asList("questionAugmented", "dbSchema", "generatedSchemaLinkings", "sql"); + String exampleTemplate = "dbSchema\nQ: questionAugmented\n" + "Schema_links: generatedSchemaLinkings\n" + + "SQL: sql"; + + String schemaLinkingPrompt = InputFormat.format(exampleTemplate, exampleKeys, fewshotExampleList); + Pair questionPrompt = promptHelper.transformQuestionPrompt(llmReq); + String dbSchema = questionPrompt.getLeft(); + String questionAugmented = questionPrompt.getRight(); + String newCaseTemplate = "%s\nQ: %s\nSchema_links: %s\nSQL: "; + String newCasePrompt = String.format(newCaseTemplate, dbSchema, questionAugmented, schemaLinkStr); + return instruction + InputFormat.SEPERATOR + schemaLinkingPrompt + InputFormat.SEPERATOR + newCasePrompt; + } + @Override public void afterPropertiesSet() { SqlGenStrategyFactory.addSqlGenerationForFactory(SqlGenType.TWO_PASS_AUTO_COT_SELF_CONSISTENCY, this); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/llm/s2sql/LLMReq.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/llm/s2sql/LLMReq.java index 58456ba87..c79f74c6c 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/llm/s2sql/LLMReq.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/llm/s2sql/LLMReq.java @@ -68,7 +68,7 @@ public class LLMReq { } public enum SqlGenType { - ONE_PASS_AUTO_COT_SELF_CONSISTENCY("1_pass_auto_cot_self_consistency"), + ONE_PASS_SELF_CONSISTENCY("1_pass_self_consistency"), TWO_PASS_AUTO_COT_SELF_CONSISTENCY("2_pass_auto_cot_self_consistency"); private String name; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/config/ParserConfig.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/config/ParserConfig.java index 20ea2b39a..24ef726fb 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/config/ParserConfig.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/config/ParserConfig.java @@ -13,12 +13,12 @@ import java.util.List; public class ParserConfig extends ParameterConfig { public static final Parameter PARSER_STRATEGY_TYPE = - new Parameter("s2.parser.strategy", "ONE_PASS_AUTO_COT_SELF_CONSISTENCY", + new Parameter("s2.parser.strategy", "ONE_PASS_SELF_CONSISTENCY", "LLM解析生成S2SQL策略", - "ONE_PASS_AUTO_COT_SELF_CONSISTENCY: 通过思维链且投票方式一步生成sql" + "ONE_PASS_SELF_CONSISTENCY: 通过投票方式一步生成sql" + "\nTWO_PASS_AUTO_COT_SELF_CONSISTENCY: 通过思维链且投票方式两步生成sql", "list", "Parser相关配置", Lists.newArrayList( - "ONE_PASS_AUTO_COT_SELF_CONSISTENCY", "TWO_PASS_AUTO_COT_SELF_CONSISTENCY")); + "ONE_PASS_SELF_CONSISTENCY", "TWO_PASS_AUTO_COT_SELF_CONSISTENCY")); public static final Parameter PARSER_LINKING_VALUE_ENABLE = new Parameter("s2.parser.linking.value.enable", "true", @@ -48,7 +48,7 @@ public class ParserConfig extends ParameterConfig { "number", "Parser相关配置"); public static final Parameter PARSER_FEW_SHOT_NUMBER = - new Parameter("s2.parser.few-shot.number", "5", + new Parameter("s2.parser.few-shot.number", "3", "few-shot样例个数", "样例越多效果可能越好,但token消耗越大", "number", "Parser相关配置"); @@ -70,11 +70,7 @@ public class ParserConfig extends ParameterConfig { @Override public List getSysParameters() { return Lists.newArrayList( - PARSER_STRATEGY_TYPE, PARSER_LINKING_VALUE_ENABLE, - PARSER_TEXT_LENGTH_THRESHOLD, - PARSER_TEXT_LENGTH_THRESHOLD_SHORT, - PARSER_TEXT_LENGTH_THRESHOLD_LONG, PARSER_FEW_SHOT_NUMBER, PARSER_SELF_CONSISTENCY_NUMBER, PARSER_SHOW_COUNT diff --git a/headless/python/few_shot_example/s2sql_exemplar3_transformed.json b/headless/python/few_shot_example/s2sql_exemplar3_transformed.json index 829c0d1b0..708ac0b50 100644 --- a/headless/python/few_shot_example/s2sql_exemplar3_transformed.json +++ b/headless/python/few_shot_example/s2sql_exemplar3_transformed.json @@ -3,7 +3,7 @@ "question": "比较jackjchen和robinlee在内容库的访问次数", "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"的访问次数 \" so we need column = [访问次数]\n\"’用户名‘,\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [访问次数,用户名,jackjchen,robinlee]", "generatedSchemaLinkings": "[访问次数,用户名,jackjchen,robinlee]" @@ -12,7 +12,7 @@ "question": "内容库近12个月访问人数 按部门", "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [数据日期,部门,访问人数,12,month]", "generatedSchemaLinkings": "[数据日期,部门,访问人数,12,month]" @@ -21,7 +21,7 @@ "question": "内容库美术部、技术研发部的访问时长", "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" @@ -30,7 +30,7 @@ "question": "近3天海田飞系MPPM结算播放份额", "questionAugmented": "近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )", "modelName": "严选", - "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]", "sql": "SELECT 严选版权归属系, 结算播放份额 FROM 严选 WHERE 严选版权归属系 = '海田飞系' AND datediff('day', 数据日期, '2023-08-21') <= 3 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )\", we are asked:\n\"结算播放份额 \" so we need column = [结算播放份额]\n\"当前的日期是\" so we need column = [数据日期]\n\"严选版权归属系\" so we need column = [严选版权归属系]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [海田飞系,3,day]. So the Schema_links are:\nSchema_links: [结算播放份额,数据日期,严选版权归属系,海田飞系,3,day]", "generatedSchemaLinkings": "[结算播放份额,数据日期,严选版权归属系,海田飞系,3,day]" @@ -39,7 +39,7 @@ "question": "对比近7天翻唱版和纯音乐的歌曲播放量", "questionAugmented": "对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]", "sql": "SELECT 歌曲版本, 语种, 结算播放量 FROM 歌曲库 WHERE 歌曲版本 = '翻唱版' AND 语种 = '纯音乐' AND datediff('day', 数据日期, '2023-05-22') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"’歌曲版本‘\" so we need column = [歌曲版本]\n\"语种‘,’ \" so we need column = [语种]\n\"曲播放量 (\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,翻唱版,纯音乐,day]. So the Schema_links are:\nSchema_links: [数据日期,歌曲版本,语种,结算播放量,7,翻唱版,纯音乐,day]", "generatedSchemaLinkings": "[数据日期,歌曲版本,语种,结算播放量,7,翻唱版,纯音乐,day]" @@ -48,7 +48,7 @@ "question": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数", "questionAugmented": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )", "modelName": "艺人库", - "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]", "sql": "SELECT 歌手名, 粉丝数 FROM 艺人库 WHERE 歌手名 IN ('陈拙悬', '孟梅琦', '赖媚韵')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )\", we are asked:\n\"歌手ID‘,\" so we need column = [歌手名]\n\"的粉丝数 (\" so we need column = [粉丝数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [陈拙悬,孟梅琦,赖媚韵]. So the Schema_links are:\nSchema_links: [歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]", "generatedSchemaLinkings": "[歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]" @@ -57,7 +57,7 @@ "question": "播放量大于1万的歌曲有多少", "questionAugmented": "播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 10000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"歌曲有多少 \" so we need column = [歌曲名]\n\"播放量大于1\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,10000]", "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" @@ -66,7 +66,7 @@ "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\n\"术部的用户是\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [部门,访问时长,用户名,美术部,1]", "generatedSchemaLinkings": "[部门,访问时长,用户名,美术部,1]" @@ -75,7 +75,7 @@ "question": "内容库pv最高的用户有哪些", "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"最高的用户有\" so we need column = [访问次数]\n\"用户有哪些 (\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [访问次数,用户名,1]", "generatedSchemaLinkings": "[访问次数,用户名,1]" @@ -84,7 +84,7 @@ "question": "近90天袁亚伟播放量平均值是多少", "questionAugmented": "近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", "modelName": "艺人库", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT AVG(结算播放量) FROM 艺人库 WHERE 歌手名 = '袁亚伟' AND datediff('day', 数据日期, '2023-08-31') <= 90 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\n\"播放量平均值\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [袁亚伟,90,day]. So the Schema_links are:\nSchema_links: [歌手名,数据日期,结算播放量,袁亚伟,90,day]", "generatedSchemaLinkings": "[歌手名,数据日期,结算播放量,袁亚伟,90,day]" @@ -93,7 +93,7 @@ "question": "周倩倩近7天结算播放量总和是多少", "questionAugmented": "周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", "modelName": "艺人库", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT SUM(结算播放量) FROM 艺人库 WHERE 歌手名 = '周倩倩' AND datediff('day', 数据日期, '2023-08-31') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\n\"结算播放量总\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [周倩倩,7,day]. So the Schema_links are:\nSchema_links: [歌手名,数据日期,结算播放量,周倩倩,7,day]", "generatedSchemaLinkings": "[歌手名,数据日期,结算播放量,周倩倩,7,day]" @@ -102,7 +102,7 @@ "question": "内容库访问次数大于1k的部门是哪些", "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"部门是哪些 \" so we need column = [部门]\n\"访问次数大于\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [部门,访问次数,1000]", "generatedSchemaLinkings": "[部门,访问次数,1000]" @@ -111,7 +111,7 @@ "question": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些", "questionAugmented": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 20000 AND 歌手名 = '陈亿训' AND 歌曲名 = '孤勇者'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲ID‘。\" so we need column = [歌曲名]\n\"的所有的播放量\" so we need column = [结算播放量]\n\"歌手ID‘,\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [孤勇者,20000,陈亿训]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]" @@ -120,7 +120,7 @@ "question": "周洁轮去年发布的歌曲有哪些", "questionAugmented": "周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发布时间, '2023-09-18') <= 1 AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"发布的歌曲有\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,周洁轮,year]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,1,周洁轮,year]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,1,周洁轮,year]" @@ -129,7 +129,7 @@ "question": "我想要近半年签约的播放量前十的歌手有哪些", "questionAugmented": "我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )", "modelName": "艺人库", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT 歌手名 FROM 艺人库 WHERE datediff('year', 签约日期, '2023-09-11') <= 0.5 ORDER BY 结算播放量 DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )\", we are asked:\n\"签约的播放量\" so we need column = [签约日期]\n\"歌手有哪些 \" so we need column = [歌手名]\n\"签约的播放量\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10,0.5,year]. So the Schema_links are:\nSchema_links: [签约日期,歌手名,结算播放量,10,0.5,year]", "generatedSchemaLinkings": "[签约日期,歌手名,结算播放量,10,0.5,year]" @@ -138,7 +138,7 @@ "question": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 1 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"最近一年发行\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,1,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,1,7,year,day]", "generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,1,7,year,day]" @@ -147,7 +147,7 @@ "question": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 0 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,0,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,0,7,year,day]", "generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,0,7,year,day]" @@ -156,7 +156,7 @@ "question": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发行日期 >= '2023-01-01' AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,2023-01-01,7,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,2023-01-01,7,day]", "generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,2023-01-01,7,day]" @@ -165,7 +165,7 @@ "question": "周洁轮2023年6月之后发布的歌曲有哪些", "questionAugmented": "周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-06-01' AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2023-06-01,周洁轮]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,2023-06-01,周洁轮]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,2023-06-01,周洁轮]" @@ -174,7 +174,7 @@ "question": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?", "questionAugmented": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-01-05' AND 歌手名 = '邓梓琦' AND 结算播放量 > 5000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"中,有哪些播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [5000000,2023-01-05,邓梓琦]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]" @@ -183,7 +183,7 @@ "question": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些?", "questionAugmented": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 数据日期 >= '2023-06-01' AND 歌手名 = '张亮英' AND 结算播放量 > 2000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )\", we are asked:\n\"的歌曲有哪些? (\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"后,张亮英播放量大\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2000000,2023-06-01,张亮英]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,歌手名,2000000,2023-06-01,张亮英]", "generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,歌手名,2000000,2023-06-01,张亮英]" @@ -192,7 +192,7 @@ "question": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些", "questionAugmented": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2021-06-01' AND 歌手名 = '李雨纯' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"的播放量大于\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月以后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,2021-06-01,李雨纯]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]" @@ -201,7 +201,7 @@ "question": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些", "questionAugmented": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '1992-04-02' AND 发布时间 <= '2020-05-02' AND 歌手名 = '刘锝桦' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"发布的播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之间发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,刘锝桦,1992-04-02,2020-05-02]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" @@ -210,7 +210,7 @@ "question": "内容库近30天访问次数的平均数", "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数的平均数\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,30,day]", "generatedSchemaLinkings": "[数据日期,访问次数,30,day]" @@ -219,7 +219,7 @@ "question": "内容库近半年哪个月的访问次数汇总最高", "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"的访问次数汇总\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,1,0.5,year]", "generatedSchemaLinkings": "[数据日期,访问次数,1,0.5,year]" @@ -228,7 +228,7 @@ "question": "内容库近半年每个月的平均访问次数", "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数 (\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,0.5,year]", "generatedSchemaLinkings": "[数据日期,访问次数,0.5,year]" @@ -237,7 +237,7 @@ "question": "内容库 按部门统计访问次数 top10 的部门", "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\" 的部门 (\" so we need column = [部门]\n\"计访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [部门,访问次数,10]", "generatedSchemaLinkings": "[部门,访问次数,10]" @@ -246,7 +246,7 @@ "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", "modelName": "内容库产品", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问量超过 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,7,20000,month]", "generatedSchemaLinkings": "[数据日期,访问次数,7,20000,month]" @@ -255,7 +255,7 @@ "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"运营播放量 \" so we need column = [播放量]\n\"当前的日期是\" so we need column = [数据日期]\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量 \" so we need column = [运营播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [播放量,数据日期,发布日期,运营播放量,1,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[播放量,数据日期,发布日期,运营播放量,1,year,100,2022-07-01,2023-07-01]" @@ -264,7 +264,7 @@ "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", "modelName": "歌曲库", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期) HAVING SUM(运营播放量) > 2000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"播放量之和,\" so we need column = [播放量]\n\"当前的日期是\" so we need column = [数据日期]\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量之\" so we need column = [运营播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2000,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [播放量,数据日期,发布日期,运营播放量,1,2000,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[播放量,数据日期,发布日期,运营播放量,1,2000,year,100,2022-07-01,2023-07-01]" @@ -273,7 +273,7 @@ "question": "今年智能机在哪个国家的销量之和最高", "questionAugmented": "今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )", "modelName": "营销月模型", - "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]", "sql": "SELECT 国家中文名, SUM(销量) FROM 营销月模型 WHERE 机型类别 = '智能机' AND 数据日期 >= '2023-01-01' AND 数据日期 <= '2023-11-01' GROUP BY 国家中文名 ORDER BY SUM(销量) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )\", we are asked:\n\"’机型类别‘\" so we need column = [机型类别]\n\"当前的日期是\" so we need column = [数据日期]\n\"国家的销量之和\" so we need column = [国家中文名]\n\"个国家的销量\" so we need column = [销量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2023-11-01,智能机,2023-01-01]. So the Schema_links are:\nSchema_links: [机型类别,数据日期,国家中文名,销量,1,2023-11-01,智能机,2023-01-01]", "generatedSchemaLinkings": "[机型类别,数据日期,国家中文名,销量,1,2023-11-01,智能机,2023-01-01]" diff --git a/launchers/standalone/src/main/resources/application-local.yaml b/launchers/standalone/src/main/resources/application-local.yaml index 68927b5dd..24b18c38b 100644 --- a/launchers/standalone/src/main/resources/application-local.yaml +++ b/launchers/standalone/src/main/resources/application-local.yaml @@ -41,7 +41,7 @@ s2: parser: url: ${s2.pyllm.url} - strategy: TWO_PASS_AUTO_COT_SELF_CONSISTENCY + strategy: ONE_PASS_SELF_CONSISTENCY exemplar-recall: number: 10 few-shot: diff --git a/launchers/standalone/src/main/resources/s2ql_exemplar.json b/launchers/standalone/src/main/resources/s2ql_exemplar.json index 7cb78b1ea..203c215b0 100644 --- a/launchers/standalone/src/main/resources/s2ql_exemplar.json +++ b/launchers/standalone/src/main/resources/s2ql_exemplar.json @@ -2,7 +2,7 @@ { "question": "比较jackjchen和robinlee在内容库的访问次数", "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", "generatedSchemaLinkings": "[用户名,访问次数,jackjchen,robinlee]" @@ -10,7 +10,7 @@ { "question": "内容库近12个月访问人数 按部门", "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", "generatedSchemaLinkings": "[部门,访问人数,数据日期,12,month]" @@ -18,7 +18,7 @@ { "question": "内容库美术部、技术研发部的访问时长", "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" @@ -26,7 +26,7 @@ { "question": "近3天海田飞系MPPM结算播放份额", "questionAugmented": "近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )", - "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]", "sql": "SELECT 严选版权归属系, 结算播放份额 FROM 严选 WHERE 严选版权归属系 = '海田飞系' AND datediff('day', 数据日期, '2023-08-21') <= 3 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )\", we are asked:\n\"结算播放份额 \" so we need column = [结算播放份额]\n\"严选版权归属系\" so we need column = [严选版权归属系]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [海田飞系,3,day]. So the Schema_links are:\nSchema_links: [结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]", "generatedSchemaLinkings": "[结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]" @@ -34,7 +34,7 @@ { "question": "对比近7天翻唱版和纯音乐的歌曲播放量", "questionAugmented": "对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]", "sql": "SELECT 歌曲版本, 语种, 结算播放量 FROM 歌曲库 WHERE 歌曲版本 = '翻唱版' AND 语种 = '纯音乐' AND datediff('day', 数据日期, '2023-05-22') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )\", we are asked:\n\"曲播放量 (\" so we need column = [结算播放量]\n\"’歌曲版本‘\" so we need column = [歌曲版本]\n\"语种‘,’ \" so we need column = [语种]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,翻唱版,纯音乐,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]", "generatedSchemaLinkings": "[结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]" @@ -42,7 +42,7 @@ { "question": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数", "questionAugmented": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]", "sql": "SELECT 歌手名, 粉丝数 FROM 艺人库 WHERE 歌手名 IN ('陈拙悬', '孟梅琦', '赖媚韵')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )\", we are asked:\n\"歌手ID‘,\" so we need column = [歌手名]\n\"的粉丝数 (\" so we need column = [粉丝数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [陈拙悬,孟梅琦,赖媚韵]. So the Schema_links are:\nSchema_links: [歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]", "generatedSchemaLinkings": "[歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]" @@ -50,7 +50,7 @@ { "question": "播放量大于1万的歌曲有多少", "questionAugmented": "播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 10000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"歌曲有多少 \" so we need column = [歌曲名]\n\"播放量大于1\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,10000]", "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" @@ -58,7 +58,7 @@ { "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", "generatedSchemaLinkings": "[用户名,部门,访问时长,美术部,1]" @@ -66,7 +66,7 @@ { "question": "内容库pv最高的用户有哪些", "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", "generatedSchemaLinkings": "[用户名,访问次数,1]" @@ -74,7 +74,7 @@ { "question": "近90天袁亚伟播放量平均值是多少", "questionAugmented": "近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT AVG(结算播放量) FROM 艺人库 WHERE 歌手名 = '袁亚伟' AND datediff('day', 数据日期, '2023-08-31') <= 90 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"播放量平均值\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [袁亚伟,90,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,袁亚伟,90,day]", "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,袁亚伟,90,day]" @@ -82,7 +82,7 @@ { "question": "周倩倩近7天结算播放量总和是多少", "questionAugmented": "周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT SUM(结算播放量) FROM 艺人库 WHERE 歌手名 = '周倩倩' AND datediff('day', 数据日期, '2023-08-31') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"结算播放量总\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [周倩倩,7,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,周倩倩,7,day]", "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,周倩倩,7,day]" @@ -90,7 +90,7 @@ { "question": "内容库访问次数大于1k的部门是哪些", "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", "generatedSchemaLinkings": "[访问次数,部门,1000]" @@ -98,7 +98,7 @@ { "question": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些", "questionAugmented": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 20000 AND 歌手名 = '陈亿训' AND 歌曲名 = '孤勇者'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲ID‘。\" so we need column = [歌曲名]\n\"的所有的播放量\" so we need column = [结算播放量]\n\"歌手ID‘,\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [孤勇者,20000,陈亿训]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]" @@ -106,7 +106,7 @@ { "question": "周洁轮去年发布的歌曲有哪些", "questionAugmented": "周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发布时间, '2023-09-18') <= 1 AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"发布的歌曲有\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,周洁轮,year]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,1,周洁轮,year]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,1,周洁轮,year]" @@ -114,7 +114,7 @@ { "question": "我想要近半年签约的播放量前十的歌手有哪些", "questionAugmented": "我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT 歌手名 FROM 艺人库 WHERE datediff('year', 签约日期, '2023-09-11') <= 0.5 ORDER BY 结算播放量 DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )\", we are asked:\n\"歌手有哪些 \" so we need column = [歌手名]\n\"签约的播放量\" so we need column = [结算播放量]\n\"签约的播放量\" so we need column = [签约日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10,0.5,year]. So the Schema_links are:\nSchema_links: [歌手名,结算播放量,签约日期,10,0.5,year]", "generatedSchemaLinkings": "[歌手名,结算播放量,签约日期,10,0.5,year]" @@ -122,7 +122,7 @@ { "question": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 1 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"最近一年发行\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,1,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]" @@ -130,7 +130,7 @@ { "question": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 0 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,0,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]" @@ -138,7 +138,7 @@ { "question": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发行日期 >= '2023-01-01' AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,2023-01-01,7,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]" @@ -146,7 +146,7 @@ { "question": "周洁轮2023年6月之后发布的歌曲有哪些", "questionAugmented": "周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-06-01' AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2023-06-01,周洁轮]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,2023-06-01,周洁轮]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,2023-06-01,周洁轮]" @@ -154,7 +154,7 @@ { "question": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?", "questionAugmented": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-01-05' AND 歌手名 = '邓梓琦' AND 结算播放量 > 5000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"中,有哪些播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [5000000,2023-01-05,邓梓琦]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]" @@ -162,7 +162,7 @@ { "question": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些?", "questionAugmented": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 数据日期 >= '2023-06-01' AND 歌手名 = '张亮英' AND 结算播放量 > 2000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )\", we are asked:\n\"的歌曲有哪些? (\" so we need column = [歌曲名]\n\"后,张亮英播放量大\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2000000,2023-06-01,张亮英]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]" @@ -170,7 +170,7 @@ { "question": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些", "questionAugmented": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2021-06-01' AND 歌手名 = '李雨纯' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"的播放量大于\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月以后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,2021-06-01,李雨纯]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]" @@ -178,7 +178,7 @@ { "question": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些", "questionAugmented": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '1992-04-02' AND 发布时间 <= '2020-05-02' AND 歌手名 = '刘锝桦' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"发布的播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之间发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,刘锝桦,1992-04-02,2020-05-02]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" @@ -186,7 +186,7 @@ { "question": "内容库近30天访问次数的平均数", "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", "generatedSchemaLinkings": "[访问次数,数据日期,30,day]" @@ -194,7 +194,7 @@ { "question": "内容库近半年哪个月的访问次数汇总最高", "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,1,0.5,year]" @@ -202,7 +202,7 @@ { "question": "内容库近半年每个月的平均访问次数", "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,0.5,year]" @@ -210,7 +210,7 @@ { "question": "内容库 按部门统计访问次数 top10 的部门", "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", "generatedSchemaLinkings": "[访问次数,部门,10]" @@ -218,7 +218,7 @@ { "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"访问量超过 \" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,7,20000,month]", "generatedSchemaLinkings": "[访问次数,数据日期,7,20000,month]" @@ -226,7 +226,7 @@ { "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量 \" so we need column = [播放量]\n\"运营播放量 \" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]" @@ -234,7 +234,7 @@ { "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期) HAVING SUM(运营播放量) > 2000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"播放量之和,\" so we need column = [播放量]\n\"运营播放量之\" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2000,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]" @@ -242,7 +242,7 @@ { "question": "今年智能机在哪个国家的销量之和最高", "questionAugmented": "今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )", - "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]", "sql": "SELECT 国家中文名, SUM(销量) FROM 营销月模型 WHERE 机型类别 = '智能机' AND 数据日期 >= '2023-01-01' AND 数据日期 <= '2023-11-01' GROUP BY 国家中文名 ORDER BY sum(销量) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )\", we are asked:\n\"国家的销量之和\" so we need column = [国家中文名]\n\"个国家的销量\" so we need column = [销量]\n\"’机型类别‘\" so we need column = [机型类别]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2023-11-01,智能机,2023-01-01]. So the Schema_links are:\nSchema_links: [国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]", "generatedSchemaLinkings": "[国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]" diff --git a/launchers/standalone/src/test/resources/application-local.yaml b/launchers/standalone/src/test/resources/application-local.yaml index 8aeb28bdc..cd0ef6d3e 100644 --- a/launchers/standalone/src/test/resources/application-local.yaml +++ b/launchers/standalone/src/test/resources/application-local.yaml @@ -37,7 +37,7 @@ logging: s2: parser: - strategy: ONE_PASS_AUTO_COT_SELF_CONSISTENCY + strategy: ONE_PASS_SELF_CONSISTENCY exemplar-recall: number: 5 few-shot: diff --git a/launchers/standalone/src/test/resources/s2ql_exemplar.json b/launchers/standalone/src/test/resources/s2ql_exemplar.json index 7cb78b1ea..203c215b0 100644 --- a/launchers/standalone/src/test/resources/s2ql_exemplar.json +++ b/launchers/standalone/src/test/resources/s2ql_exemplar.json @@ -2,7 +2,7 @@ { "question": "比较jackjchen和robinlee在内容库的访问次数", "questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 用户名, 访问次数 FROM 内容库产品 WHERE 用户名 IN ('jackjchen', 'robinlee')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]", "generatedSchemaLinkings": "[用户名,访问次数,jackjchen,robinlee]" @@ -10,7 +10,7 @@ { "question": "内容库近12个月访问人数 按部门", "questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 数据日期, 访问人数 FROM 内容库产品 WHERE datediff('month', 数据日期, '2022-11-06') <= 12 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]", "generatedSchemaLinkings": "[部门,访问人数,数据日期,12,month]" @@ -18,7 +18,7 @@ { "question": "内容库美术部、技术研发部的访问时长", "questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门, 访问时长 FROM 内容库产品 WHERE 部门 IN ('美术部', '技术研发部')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]", "generatedSchemaLinkings": "[部门,访问时长,美术部,技术研发部]" @@ -26,7 +26,7 @@ { "question": "近3天海田飞系MPPM结算播放份额", "questionAugmented": "近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )", - "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]", "sql": "SELECT 严选版权归属系, 结算播放份额 FROM 严选 WHERE 严选版权归属系 = '海田飞系' AND datediff('day', 数据日期, '2023-08-21') <= 3 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )\", we are asked:\n\"结算播放份额 \" so we need column = [结算播放份额]\n\"严选版权归属系\" so we need column = [严选版权归属系]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [海田飞系,3,day]. So the Schema_links are:\nSchema_links: [结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]", "generatedSchemaLinkings": "[结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]" @@ -34,7 +34,7 @@ { "question": "对比近7天翻唱版和纯音乐的歌曲播放量", "questionAugmented": "对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]", "sql": "SELECT 歌曲版本, 语种, 结算播放量 FROM 歌曲库 WHERE 歌曲版本 = '翻唱版' AND 语种 = '纯音乐' AND datediff('day', 数据日期, '2023-05-22') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )\", we are asked:\n\"曲播放量 (\" so we need column = [结算播放量]\n\"’歌曲版本‘\" so we need column = [歌曲版本]\n\"语种‘,’ \" so we need column = [语种]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,翻唱版,纯音乐,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]", "generatedSchemaLinkings": "[结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]" @@ -42,7 +42,7 @@ { "question": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数", "questionAugmented": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]", "sql": "SELECT 歌手名, 粉丝数 FROM 艺人库 WHERE 歌手名 IN ('陈拙悬', '孟梅琦', '赖媚韵')", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )\", we are asked:\n\"歌手ID‘,\" so we need column = [歌手名]\n\"的粉丝数 (\" so we need column = [粉丝数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [陈拙悬,孟梅琦,赖媚韵]. So the Schema_links are:\nSchema_links: [歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]", "generatedSchemaLinkings": "[歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]" @@ -50,7 +50,7 @@ { "question": "播放量大于1万的歌曲有多少", "questionAugmented": "播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 10000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"歌曲有多少 \" so we need column = [歌曲名]\n\"播放量大于1\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,10000]", "generatedSchemaLinkings": "[歌曲名,结算播放量,10000]" @@ -58,7 +58,7 @@ { "question": "内容库访问时长小于1小时,且来自美术部的用户是哪些", "questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 WHERE 部门 = '美术部' AND 访问时长 < 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]", "generatedSchemaLinkings": "[用户名,部门,访问时长,美术部,1]" @@ -66,7 +66,7 @@ { "question": "内容库pv最高的用户有哪些", "questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 用户名 FROM 内容库产品 ORDER BY 访问次数 DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]", "generatedSchemaLinkings": "[用户名,访问次数,1]" @@ -74,7 +74,7 @@ { "question": "近90天袁亚伟播放量平均值是多少", "questionAugmented": "近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT AVG(结算播放量) FROM 艺人库 WHERE 歌手名 = '袁亚伟' AND datediff('day', 数据日期, '2023-08-31') <= 90 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"播放量平均值\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [袁亚伟,90,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,袁亚伟,90,day]", "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,袁亚伟,90,day]" @@ -82,7 +82,7 @@ { "question": "周倩倩近7天结算播放量总和是多少", "questionAugmented": "周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT SUM(结算播放量) FROM 艺人库 WHERE 歌手名 = '周倩倩' AND datediff('day', 数据日期, '2023-08-31') <= 7 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"结算播放量总\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [周倩倩,7,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,周倩倩,7,day]", "generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,周倩倩,7,day]" @@ -90,7 +90,7 @@ { "question": "内容库访问次数大于1k的部门是哪些", "questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]", "sql": "SELECT 部门 FROM 内容库产品 WHERE 访问次数 > 1000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]", "generatedSchemaLinkings": "[访问次数,部门,1000]" @@ -98,7 +98,7 @@ { "question": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些", "questionAugmented": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 结算播放量 > 20000 AND 歌手名 = '陈亿训' AND 歌曲名 = '孤勇者'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲ID‘。\" so we need column = [歌曲名]\n\"的所有的播放量\" so we need column = [结算播放量]\n\"歌手ID‘,\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [孤勇者,20000,陈亿训]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]" @@ -106,7 +106,7 @@ { "question": "周洁轮去年发布的歌曲有哪些", "questionAugmented": "周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发布时间, '2023-09-18') <= 1 AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"发布的歌曲有\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,周洁轮,year]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,1,周洁轮,year]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,1,周洁轮,year]" @@ -114,7 +114,7 @@ { "question": "我想要近半年签约的播放量前十的歌手有哪些", "questionAugmented": "我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )", - "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]", "sql": "SELECT 歌手名 FROM 艺人库 WHERE datediff('year', 签约日期, '2023-09-11') <= 0.5 ORDER BY 结算播放量 DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )\", we are asked:\n\"歌手有哪些 \" so we need column = [歌手名]\n\"签约的播放量\" so we need column = [结算播放量]\n\"签约的播放量\" so we need column = [签约日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10,0.5,year]. So the Schema_links are:\nSchema_links: [歌手名,结算播放量,签约日期,10,0.5,year]", "generatedSchemaLinkings": "[歌手名,结算播放量,签约日期,10,0.5,year]" @@ -122,7 +122,7 @@ { "question": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 1 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"最近一年发行\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,1,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]" @@ -130,7 +130,7 @@ { "question": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE datediff('year', 发行日期, '2023-08-12') <= 0 AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,0,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]" @@ -138,7 +138,7 @@ { "question": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的", "questionAugmented": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发行日期 >= '2023-01-01' AND datediff('day', 数据日期, '2023-08-12') <= 7 AND 结算播放量 > 10000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,2023-01-01,7,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]", "generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]" @@ -146,7 +146,7 @@ { "question": "周洁轮2023年6月之后发布的歌曲有哪些", "questionAugmented": "周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-06-01' AND 歌手名 = '周洁轮'", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2023-06-01,周洁轮]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,2023-06-01,周洁轮]", "generatedSchemaLinkings": "[歌曲名,歌手名,发布时间,2023-06-01,周洁轮]" @@ -154,7 +154,7 @@ { "question": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?", "questionAugmented": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2023-01-05' AND 歌手名 = '邓梓琦' AND 结算播放量 > 5000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"中,有哪些播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [5000000,2023-01-05,邓梓琦]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]" @@ -162,7 +162,7 @@ { "question": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些?", "questionAugmented": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 数据日期 >= '2023-06-01' AND 歌手名 = '张亮英' AND 结算播放量 > 2000000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )\", we are asked:\n\"的歌曲有哪些? (\" so we need column = [歌曲名]\n\"后,张亮英播放量大\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2000000,2023-06-01,张亮英]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]" @@ -170,7 +170,7 @@ { "question": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些", "questionAugmented": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '2021-06-01' AND 歌手名 = '李雨纯' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"的播放量大于\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月以后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,2021-06-01,李雨纯]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]" @@ -178,7 +178,7 @@ { "question": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些", "questionAugmented": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]", "sql": "SELECT 歌曲名 FROM 歌曲库 WHERE 发布时间 >= '1992-04-02' AND 发布时间 <= '2020-05-02' AND 歌手名 = '刘锝桦' AND 结算播放量 > 200000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"发布的播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之间发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,刘锝桦,1992-04-02,2020-05-02]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]", "generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]" @@ -186,7 +186,7 @@ { "question": "内容库近30天访问次数的平均数", "questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT AVG(访问次数) FROM 内容库产品 WHERE datediff('day', 数据日期, '2023-09-04') <= 30 ", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]", "generatedSchemaLinkings": "[访问次数,数据日期,30,day]" @@ -194,7 +194,7 @@ { "question": "内容库近半年哪个月的访问次数汇总最高", "questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期) ORDER BY SUM(访问次数) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,1,0.5,year]" @@ -202,7 +202,7 @@ { "question": "内容库近半年每个月的平均访问次数", "questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), AVG(访问次数) FROM 内容库产品 WHERE datediff('year', 数据日期, '2023-09-04') <= 0.5 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]", "generatedSchemaLinkings": "[访问次数,数据日期,0.5,year]" @@ -210,7 +210,7 @@ { "question": "内容库 按部门统计访问次数 top10 的部门", "questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT 部门, SUM(访问次数) FROM 内容库产品 GROUP BY 部门 ORDER BY SUM(访问次数) DESC LIMIT 10", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]", "generatedSchemaLinkings": "[访问次数,部门,10]" @@ -218,7 +218,7 @@ { "question": "超音速 近7个月,月度总访问量超过 2万的月份", "questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期) FROM 内容库产品 WHERE datediff('month', 数据日期, '2023-09-10') <= 7 GROUP BY MONTH(数据日期) HAVING SUM(访问次数) > 20000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"访问量超过 \" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,7,20000,month]", "generatedSchemaLinkings": "[访问次数,数据日期,7,20000,month]" @@ -226,7 +226,7 @@ { "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期)", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量 \" so we need column = [播放量]\n\"运营播放量 \" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]" @@ -234,7 +234,7 @@ { "question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份", "questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )", - "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]", "sql": "SELECT MONTH(数据日期), SUM(运营播放量) FROM (SELECT 数据日期, 运营播放量 FROM 歌曲库 WHERE 发布日期 >= '2022-07-01' AND 发布日期 <= '2023-07-01' ORDER BY 播放量 DESC LIMIT 100) t WHERE datediff('year', 数据日期, '2023-09-10') <= 1 GROUP BY MONTH(数据日期) HAVING SUM(运营播放量) > 2000", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"播放量之和,\" so we need column = [播放量]\n\"运营播放量之\" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2000,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]", "generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]" @@ -242,7 +242,7 @@ { "question": "今年智能机在哪个国家的销量之和最高", "questionAugmented": "今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )", - "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]\nForeign_keys: []", + "dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]", "sql": "SELECT 国家中文名, SUM(销量) FROM 营销月模型 WHERE 机型类别 = '智能机' AND 数据日期 >= '2023-01-01' AND 数据日期 <= '2023-11-01' GROUP BY 国家中文名 ORDER BY sum(销量) DESC LIMIT 1", "generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )\", we are asked:\n\"国家的销量之和\" so we need column = [国家中文名]\n\"个国家的销量\" so we need column = [销量]\n\"’机型类别‘\" so we need column = [机型类别]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2023-11-01,智能机,2023-01-01]. So the Schema_links are:\nSchema_links: [国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]", "generatedSchemaLinkings": "[国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]"