diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java index aa3624536..2677e7bb0 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java @@ -127,6 +127,16 @@ public class DataSetSchema { return dimensions.stream().anyMatch(SchemaElement::containsPartitionTime); } + public SchemaElement getPartitionDimension() { + for (SchemaElement dimension : dimensions) { + String partitionTimeFormat = dimension.getPartitionTimeFormat(); + if (StringUtils.isNotBlank(partitionTimeFormat)) { + return dimension; + } + } + return null; + } + public String getPartitionTimeFormat() { for (SchemaElement dimension : dimensions) { String partitionTimeFormat = dimension.getPartitionTimeFormat(); diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/TimeCorrector.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/TimeCorrector.java index 115c417f2..41854ef05 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/TimeCorrector.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/corrector/TimeCorrector.java @@ -21,6 +21,7 @@ import org.springframework.util.CollectionUtils; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Set; /** @@ -53,19 +54,24 @@ public class TimeCorrector extends BaseSemanticCorrector { String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL(); List whereFields = SqlSelectHelper.getWhereFields(correctS2SQL); Long dataSetId = semanticParseInfo.getDataSetId(); - - if (CollectionUtils.isEmpty(whereFields) || !TimeDimensionEnum.containsZhTimeDimension(whereFields)) { + DataSetSchema dataSetSchema = chatQueryContext.getSemanticSchema().getDataSetSchemaMap().get(dataSetId); + if (Objects.isNull(dataSetSchema) + || Objects.isNull(dataSetSchema.getPartitionDimension()) + || Objects.isNull(dataSetSchema.getPartitionDimension().getName())) { + return; + } + String partitionDimension = dataSetSchema.getPartitionDimension().getName(); + if (CollectionUtils.isEmpty(whereFields) || !whereFields.contains(partitionDimension)) { Pair startEndDate = S2SqlDateHelper.getStartEndDate(chatQueryContext, dataSetId, semanticParseInfo.getQueryType()); if (isValidDateRange(startEndDate)) { correctS2SQL = SqlAddHelper.addParenthesisToWhere(correctS2SQL); - String dateChName = TimeDimensionEnum.DAY.getChName(); String startDateLeft = startEndDate.getLeft(); String endDateRight = startEndDate.getRight(); String condExpr = String.format(" ( %s >= '%s' and %s <= '%s' )", - dateChName, startDateLeft, dateChName, endDateRight); + partitionDimension, startDateLeft, partitionDimension, endDateRight); correctS2SQL = addConditionToSQL(correctS2SQL, condExpr); } } diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/LLMRequestService.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/LLMRequestService.java index c65fb7e1e..eeedb8d66 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/LLMRequestService.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/parser/llm/LLMRequestService.java @@ -1,9 +1,8 @@ package com.tencent.supersonic.headless.chat.parser.llm; import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum; -import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; -import com.tencent.supersonic.common.util.ContextUtils; import com.tencent.supersonic.common.util.DateUtils; +import com.tencent.supersonic.headless.api.pojo.DataSetSchema; import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaElementType; @@ -61,43 +60,24 @@ public class LLMRequestService { } public LLMReq getLlmReq(ChatQueryContext queryCtx, Long dataSetId) { - LLMRequestService requestService = ContextUtils.getBean(LLMRequestService.class); - List linkingValues = requestService.getValues(queryCtx, dataSetId); - SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); - Map dataSetIdToName = semanticSchema.getDataSetIdToName(); + List linkingValues = getValues(queryCtx, dataSetId); + Map dataSetIdToName = queryCtx.getSemanticSchema().getDataSetIdToName(); String queryText = queryCtx.getQueryText(); LLMReq llmReq = new LLMReq(); llmReq.setQueryText(queryText); - LLMReq.FilterCondition filterCondition = new LLMReq.FilterCondition(); - llmReq.setFilterCondition(filterCondition); - LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema(); llmSchema.setDataSetId(dataSetId); llmSchema.setDataSetName(dataSetIdToName.get(dataSetId)); - llmSchema.setDomainName(dataSetIdToName.get(dataSetId)); - - Set fieldNameList = getMatchedFieldNames(queryCtx, dataSetId); - if (Objects.nonNull(semanticSchema.getDataSetSchemaMap()) - && Objects.nonNull(semanticSchema.getDataSetSchemaMap().get(dataSetId))) { - TimeDefaultConfig timeDefaultConfig = semanticSchema.getDataSetSchemaMap() - .get(dataSetId).getTagTypeTimeDefaultConfig(); - if (!Objects.equals(timeDefaultConfig.getUnit(), -1) - && queryCtx.containsPartitionDimensions(dataSetId)) { - // 数据集配置了数据日期字段,并查询设置 时间不为-1时才添加 '数据日期' 字段 - fieldNameList.add(TimeDimensionEnum.DAY.getChName()); - } - } - llmSchema.setFieldNameList(new ArrayList<>(fieldNameList)); llmSchema.setMetrics(getMatchedMetrics(queryCtx, dataSetId)); llmSchema.setDimensions(getMatchedDimensions(queryCtx, dataSetId)); llmSchema.setTerms(getTerms(queryCtx, dataSetId)); llmReq.setSchema(llmSchema); - String priorExts = getPriorExts(queryCtx, new ArrayList<>(fieldNameList)); - llmReq.setPriorExts(priorExts); + String priorKnowledge = getPriorKnowledge(queryCtx, llmSchema); + llmReq.setPriorExts(priorKnowledge); List linking = new ArrayList<>(); boolean linkingValueEnabled = Boolean.valueOf(parserConfig.getParameterValue(PARSER_LINKING_VALUE_ENABLE)); @@ -143,12 +123,20 @@ public class LLMRequestService { }).collect(Collectors.toList()); } - private String getPriorExts(ChatQueryContext queryContext, List fieldNameList) { - StringBuilder extraInfoSb = new StringBuilder(); + private String getPriorKnowledge(ChatQueryContext queryContext, LLMReq.LLMSchema llmSchema) { + StringBuilder priorKnowledgeBuilder = new StringBuilder(); SemanticSchema semanticSchema = queryContext.getSemanticSchema(); - // 获取字段名到数据格式类型的映射 - Map fieldNameToDataFormatType = semanticSchema.getMetrics().stream() + appendMetricPriorKnowledge(llmSchema, priorKnowledgeBuilder, semanticSchema); + + // 处理维度字段 + appendDimensionPriorKnowledge(llmSchema, priorKnowledgeBuilder, semanticSchema); + + return priorKnowledgeBuilder.toString(); + } + + private Map getFieldNameToDataFormatTypeMap(SemanticSchema semanticSchema) { + return semanticSchema.getMetrics().stream() .filter(metric -> Objects.nonNull(metric.getDataFormatType())) .flatMap(metric -> { Set> fieldFormatPairs = new HashSet<>(); @@ -161,35 +149,48 @@ public class LLMRequestService { return fieldFormatPairs.stream(); }) .collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (existing, replacement) -> existing)); + } - Map fieldNameToDateFormat = semanticSchema.getDimensions().stream() + private void appendMetricPriorKnowledge(LLMReq.LLMSchema llmSchema, + StringBuilder priorKnowledgeBuilder, + SemanticSchema semanticSchema) { + Map fieldNameToDataFormatType = getFieldNameToDataFormatTypeMap(semanticSchema); + + for (SchemaElement schemaElement : llmSchema.getMetrics()) { + String fieldName = schemaElement.getName(); + String dataFormatType = fieldNameToDataFormatType.get(fieldName); + if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType) + || DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) { + priorKnowledgeBuilder.append(String.format("%s的计量单位是%s; ", fieldName, "小数")); + } + } + } + + private Map getFieldNameToDateFormatMap(SemanticSchema semanticSchema) { + return semanticSchema.getDimensions().stream() .filter(dimension -> StringUtils.isNotBlank(dimension.getTimeFormat())) .collect(Collectors.toMap( SchemaElement::getName, value -> Optional.ofNullable(value.getPartitionTimeFormat()).orElse(""), (k1, k2) -> k1) ); + } - // 构建额外信息字符串 - for (String fieldName : fieldNameList) { - String dataFormatType = fieldNameToDataFormatType.get(fieldName); - if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType) - || DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) { - extraInfoSb.append(String.format("%s的计量单位是%s; ", fieldName, "小数")); - } - } - // 构建日期格式化信息 - for (String fieldName : fieldNameList) { + private void appendDimensionPriorKnowledge(LLMReq.LLMSchema llmSchema, + StringBuilder priorKnowledgeBuilder, + SemanticSchema semanticSchema) { + Map fieldNameToDateFormat = getFieldNameToDateFormatMap(semanticSchema); + + for (SchemaElement schemaElement : llmSchema.getDimensions()) { + String fieldName = schemaElement.getName(); String timeFormat = fieldNameToDateFormat.get(fieldName); if (StringUtils.isNotBlank(timeFormat)) { - extraInfoSb.append(String.format("%s的日期Format格式是%s; ", fieldName, timeFormat)); + priorKnowledgeBuilder.append(String.format("%s是分区时间且格式是%s", fieldName, timeFormat)); } } - return extraInfoSb.toString(); } public List getValues(ChatQueryContext queryCtx, Long dataSetId) { - Map itemIdToName = getItemIdToName(queryCtx, dataSetId); List matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); if (CollectionUtils.isEmpty(matchedElements)) { return new ArrayList<>(); @@ -203,20 +204,13 @@ public class LLMRequestService { }) .map(elementMatch -> { LLMReq.ElementValue elementValue = new LLMReq.ElementValue(); - elementValue.setFieldName(itemIdToName.get(elementMatch.getElement().getId())); + elementValue.setFieldName(elementMatch.getElement().getName()); elementValue.setFieldValue(elementMatch.getWord()); return elementValue; }).collect(Collectors.toSet()); return new ArrayList<>(valueMatches); } - protected Map getItemIdToName(ChatQueryContext queryCtx, Long dataSetId) { - SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); - List elements = semanticSchema.getDimensions(dataSetId); - return elements.stream().collect( - Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2)); - } - protected List getMatchedMetrics(ChatQueryContext queryCtx, Long dataSetId) { List matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); if (CollectionUtils.isEmpty(matchedElements)) { @@ -235,40 +229,40 @@ public class LLMRequestService { } protected List getMatchedDimensions(ChatQueryContext queryCtx, Long dataSetId) { - List matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); - if (CollectionUtils.isEmpty(matchedElements)) { - return Collections.emptyList(); - } - return matchedElements.stream() - .filter(schemaElementMatch -> { - SchemaElementType elementType = schemaElementMatch.getElement().getType(); - return SchemaElementType.DIMENSION.equals(elementType); - }) - .map(schemaElementMatch -> schemaElementMatch.getElement()) - .collect(Collectors.toList()); - } - protected Set getMatchedFieldNames(ChatQueryContext queryCtx, Long dataSetId) { - Map itemIdToName = getItemIdToName(queryCtx, dataSetId); List matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); - if (CollectionUtils.isEmpty(matchedElements)) { - return new HashSet<>(); + Set results = new HashSet<>(); + + if (!CollectionUtils.isEmpty(matchedElements)) { + results = matchedElements.stream() + .filter(element -> SchemaElementType.DIMENSION.equals(element.getElement().getType())) + .map(SchemaElementMatch::getElement) + .collect(Collectors.toSet()); } - return matchedElements.stream() - .filter(schemaElementMatch -> { - SchemaElementType elementType = schemaElementMatch.getElement().getType(); - return SchemaElementType.METRIC.equals(elementType) - || SchemaElementType.DIMENSION.equals(elementType) - || SchemaElementType.VALUE.equals(elementType); - }) - .map(schemaElementMatch -> { - SchemaElement element = schemaElementMatch.getElement(); - SchemaElementType elementType = element.getType(); - if (SchemaElementType.VALUE.equals(elementType)) { - return itemIdToName.get(element.getId()); - } - return schemaElementMatch.getWord(); - }) - .collect(Collectors.toSet()); + + SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); + if (semanticSchema == null || semanticSchema.getDataSetSchemaMap() == null) { + return new ArrayList<>(results); + } + + DataSetSchema dataSetSchema = semanticSchema.getDataSetSchemaMap().get(dataSetId); + if (dataSetSchema == null) { + return new ArrayList<>(results); + } + + TimeDefaultConfig timeDefaultConfig = dataSetSchema.getTagTypeTimeDefaultConfig(); + SchemaElement partitionDimension = dataSetSchema.getPartitionDimension(); + + if (timeDefaultConfig == null || partitionDimension == null) { + return new ArrayList<>(results); + } + + if (Objects.equals(timeDefaultConfig.getUnit(), -1)) { + results.remove(partitionDimension); + } else { + results.add(partitionDimension); + } + + return new ArrayList<>(results); } } diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/query/llm/s2sql/LLMReq.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/query/llm/s2sql/LLMReq.java index 335cbe905..dddb03cd5 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/query/llm/s2sql/LLMReq.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/query/llm/s2sql/LLMReq.java @@ -7,81 +7,61 @@ import com.tencent.supersonic.common.pojo.ChatModelConfig; import com.tencent.supersonic.common.pojo.Text2SQLExemplar; import com.tencent.supersonic.headless.api.pojo.SchemaElement; import lombok.Data; +import org.apache.commons.collections4.CollectionUtils; +import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; @Data public class LLMReq { - private String queryText; - - private FilterCondition filterCondition; - private LLMSchema schema; - private List linking; - private String currentDate; - private String priorExts; - private SqlGenType sqlGenType; - private ChatModelConfig modelConfig; private PromptConfig promptConfig; - private List dynamicExemplars; - @Data public static class ElementValue { - private String fieldName; - private String fieldValue; } @Data public static class LLMSchema { - - private String domainName; - - private String dataSetName; - private Long dataSetId; - + private String dataSetName; private List fieldNameList; - private List metrics; - private List dimensions; - private List terms; - } - - @Data - public static class FilterCondition { - - private String tableName; + public List getFieldNameList() { + List fieldNameList = new ArrayList<>(); + if (CollectionUtils.isNotEmpty(metrics)) { + fieldNameList.addAll(metrics.stream().map(metric -> metric.getName()).collect(Collectors.toList())); + } + if (CollectionUtils.isNotEmpty(dimensions)) { + fieldNameList.addAll(dimensions.stream().map(metric -> metric.getName()).collect(Collectors.toList())); + } + return fieldNameList; + } } @Data public static class Term { - private String name; - private String description; - private List alias = Lists.newArrayList(); - } public enum SqlGenType { - ONE_PASS_SELF_CONSISTENCY("1_pass_self_consistency"); - private String name; SqlGenType(String name) { diff --git a/headless/chat/src/test/java/com/tencent/supersonic/headless/chat/corrector/SchemaCorrectorTest.java b/headless/chat/src/test/java/com/tencent/supersonic/headless/chat/corrector/SchemaCorrectorTest.java index 636c24971..541e7957a 100644 --- a/headless/chat/src/test/java/com/tencent/supersonic/headless/chat/corrector/SchemaCorrectorTest.java +++ b/headless/chat/src/test/java/com/tencent/supersonic/headless/chat/corrector/SchemaCorrectorTest.java @@ -26,11 +26,7 @@ class SchemaCorrectorTest { + " \"dataSetId\": 1,\n" + " \"llmReq\": {\n" + " \"queryText\": \"xxx2024年播放量最高的十首歌\",\n" - + " \"filterCondition\": {\n" - + " \"tableName\": null\n" - + " },\n" + " \"schema\": {\n" - + " \"domainName\": \"歌曲\",\n" + " \"dataSetName\": \"歌曲\",\n" + " \"fieldNameList\": [\n" + " \"商务组\",\n" diff --git a/launchers/standalone/src/main/resources/s2-exemplar.json b/launchers/standalone/src/main/resources/s2-exemplar.json index d9afdf04a..46ba1bf52 100644 --- a/launchers/standalone/src/main/resources/s2-exemplar.json +++ b/launchers/standalone/src/main/resources/s2-exemplar.json @@ -1,50 +1,50 @@ [ { "question": "比较jackjchen和robinlee今年以来的访问次数", - "sideInfo": "CurrentDate=[2020-12-01],DomainTerms=[<核心用户 COMMENT '核心用户指tom和lucy'>]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[], Values[<用户='jackjchen'>,<用户='robinlee'>]", + "sideInfo": "CurrentDate=[2020-12-01],DomainTerms=[<核心用户 COMMENT '核心用户指tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<数据日期>], Values[<用户='jackjchen'>,<用户='robinlee'>]", "sql": "SELECT 用户, 访问次数 FROM 超音数产品 WHERE 用户 IN ('jackjchen', 'robinlee') AND 数据日期 >= '2020-01-01' AND 数据日期 <= '2020-12-01'" }, { "question": "超音数近12个月访问人数 按部门", - "sideInfo": "CurrentDate=[2022-11-06]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<部门>], Values=[]", + "sideInfo": "CurrentDate=[2022-11-06],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<部门>,<数据日期>], Values=[]", "sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE 数据日期 >= '2021-11-06' AND 数据日期 <= '2022-11-06'" }, { "question": "超音数过去90天美术部、技术研发部的访问时长", - "sideInfo": "CurrentDate=[2023-04-21]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions=[], Values=[<部门='美术部'>,<部门='技术研发部'>]", + "sideInfo": "CurrentDate=[2023-04-21],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions=[<数据日期>], Values=[<部门='美术部'>,<部门='技术研发部'>]", "sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部') AND 数据日期 >= '2023-01-20' AND 数据日期 <= '2023-04-21'" }, { "question": "超音数访问时长小于1小时,且来自美术部的用户是哪些", - "sideInfo": "CurrentDate=[2023-07-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>]", - "dbSchema": "Table:[超音数产品], Metrics:[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions:[<用户>], Values:[<部门='美术部'>]", + "sideInfo": "CurrentDate=[2023-07-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table:[超音数产品], Metrics:[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions:[<用户>,<数据日期>], Values:[<部门='美术部'>]", "sql": "SELECT 用户 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1" }, { "question": "超音数本月pv最高的用户有哪些", - "sideInfo": "CurrentDate=[2023-08-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>], Values=[]", + "sideInfo": "CurrentDate=[2023-08-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>,<数据日期>], Values=[]", "sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-08-01' AND 数据日期 <= '2023-08-31' ORDER BY 访问次数 DESC LIMIT 1" }, { "question": "超音数访问次数大于1k的部门是哪些", - "sideInfo": "CurrentDate=[2023-09-14]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>], Values=[]", + "sideInfo": "CurrentDate=[2023-09-14],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]", "sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000" }, { "question": "过去半个月核心用户的访问次数", - "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>], Values=[]", + "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]", "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'" }, { "question": "过去半个月忠实用户有哪一些", - "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<忠实用户 COMMENT '一段时间内总访问次数大于100的用户'>]", - "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>], Values=[]", + "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<忠实用户 COMMENT '一段时间内总访问次数大于100的用户'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]", + "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>,<数据日期>], Values=[]", "sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户 HAVING SUM(访问次数) > 100" } ] \ No newline at end of file