(improvement)(chat) Remove the special field 'data date' to avoid generating non-existent time fields. (#1596)

This commit is contained in:
lexluo09
2024-08-23 13:37:14 +08:00
committed by GitHub
parent afa82bf98d
commit be9a8bbc27
6 changed files with 127 additions and 141 deletions

View File

@@ -127,6 +127,16 @@ public class DataSetSchema {
return dimensions.stream().anyMatch(SchemaElement::containsPartitionTime); return dimensions.stream().anyMatch(SchemaElement::containsPartitionTime);
} }
public SchemaElement getPartitionDimension() {
for (SchemaElement dimension : dimensions) {
String partitionTimeFormat = dimension.getPartitionTimeFormat();
if (StringUtils.isNotBlank(partitionTimeFormat)) {
return dimension;
}
}
return null;
}
public String getPartitionTimeFormat() { public String getPartitionTimeFormat() {
for (SchemaElement dimension : dimensions) { for (SchemaElement dimension : dimensions) {
String partitionTimeFormat = dimension.getPartitionTimeFormat(); String partitionTimeFormat = dimension.getPartitionTimeFormat();

View File

@@ -21,6 +21,7 @@ import org.springframework.util.CollectionUtils;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.Set; import java.util.Set;
/** /**
@@ -53,19 +54,24 @@ public class TimeCorrector extends BaseSemanticCorrector {
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL(); String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectedS2SQL();
List<String> whereFields = SqlSelectHelper.getWhereFields(correctS2SQL); List<String> whereFields = SqlSelectHelper.getWhereFields(correctS2SQL);
Long dataSetId = semanticParseInfo.getDataSetId(); Long dataSetId = semanticParseInfo.getDataSetId();
DataSetSchema dataSetSchema = chatQueryContext.getSemanticSchema().getDataSetSchemaMap().get(dataSetId);
if (CollectionUtils.isEmpty(whereFields) || !TimeDimensionEnum.containsZhTimeDimension(whereFields)) { if (Objects.isNull(dataSetSchema)
|| Objects.isNull(dataSetSchema.getPartitionDimension())
|| Objects.isNull(dataSetSchema.getPartitionDimension().getName())) {
return;
}
String partitionDimension = dataSetSchema.getPartitionDimension().getName();
if (CollectionUtils.isEmpty(whereFields) || !whereFields.contains(partitionDimension)) {
Pair<String, String> startEndDate = S2SqlDateHelper.getStartEndDate(chatQueryContext, dataSetId, Pair<String, String> startEndDate = S2SqlDateHelper.getStartEndDate(chatQueryContext, dataSetId,
semanticParseInfo.getQueryType()); semanticParseInfo.getQueryType());
if (isValidDateRange(startEndDate)) { if (isValidDateRange(startEndDate)) {
correctS2SQL = SqlAddHelper.addParenthesisToWhere(correctS2SQL); correctS2SQL = SqlAddHelper.addParenthesisToWhere(correctS2SQL);
String dateChName = TimeDimensionEnum.DAY.getChName();
String startDateLeft = startEndDate.getLeft(); String startDateLeft = startEndDate.getLeft();
String endDateRight = startEndDate.getRight(); String endDateRight = startEndDate.getRight();
String condExpr = String.format(" ( %s >= '%s' and %s <= '%s' )", String condExpr = String.format(" ( %s >= '%s' and %s <= '%s' )",
dateChName, startDateLeft, dateChName, endDateRight); partitionDimension, startDateLeft, partitionDimension, endDateRight);
correctS2SQL = addConditionToSQL(correctS2SQL, condExpr); correctS2SQL = addConditionToSQL(correctS2SQL, condExpr);
} }
} }

View File

@@ -1,9 +1,8 @@
package com.tencent.supersonic.headless.chat.parser.llm; package com.tencent.supersonic.headless.chat.parser.llm;
import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum; import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.DateUtils; import com.tencent.supersonic.common.util.DateUtils;
import com.tencent.supersonic.headless.api.pojo.DataSetSchema;
import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType; import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
@@ -61,43 +60,24 @@ public class LLMRequestService {
} }
public LLMReq getLlmReq(ChatQueryContext queryCtx, Long dataSetId) { public LLMReq getLlmReq(ChatQueryContext queryCtx, Long dataSetId) {
LLMRequestService requestService = ContextUtils.getBean(LLMRequestService.class); List<LLMReq.ElementValue> linkingValues = getValues(queryCtx, dataSetId);
List<LLMReq.ElementValue> linkingValues = requestService.getValues(queryCtx, dataSetId); Map<Long, String> dataSetIdToName = queryCtx.getSemanticSchema().getDataSetIdToName();
SemanticSchema semanticSchema = queryCtx.getSemanticSchema();
Map<Long, String> dataSetIdToName = semanticSchema.getDataSetIdToName();
String queryText = queryCtx.getQueryText(); String queryText = queryCtx.getQueryText();
LLMReq llmReq = new LLMReq(); LLMReq llmReq = new LLMReq();
llmReq.setQueryText(queryText); llmReq.setQueryText(queryText);
LLMReq.FilterCondition filterCondition = new LLMReq.FilterCondition();
llmReq.setFilterCondition(filterCondition);
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema(); LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
llmSchema.setDataSetId(dataSetId); llmSchema.setDataSetId(dataSetId);
llmSchema.setDataSetName(dataSetIdToName.get(dataSetId)); llmSchema.setDataSetName(dataSetIdToName.get(dataSetId));
llmSchema.setDomainName(dataSetIdToName.get(dataSetId));
Set<String> fieldNameList = getMatchedFieldNames(queryCtx, dataSetId);
if (Objects.nonNull(semanticSchema.getDataSetSchemaMap())
&& Objects.nonNull(semanticSchema.getDataSetSchemaMap().get(dataSetId))) {
TimeDefaultConfig timeDefaultConfig = semanticSchema.getDataSetSchemaMap()
.get(dataSetId).getTagTypeTimeDefaultConfig();
if (!Objects.equals(timeDefaultConfig.getUnit(), -1)
&& queryCtx.containsPartitionDimensions(dataSetId)) {
// 数据集配置了数据日期字段,并查询设置 时间不为-1时才添加 '数据日期' 字段
fieldNameList.add(TimeDimensionEnum.DAY.getChName());
}
}
llmSchema.setFieldNameList(new ArrayList<>(fieldNameList));
llmSchema.setMetrics(getMatchedMetrics(queryCtx, dataSetId)); llmSchema.setMetrics(getMatchedMetrics(queryCtx, dataSetId));
llmSchema.setDimensions(getMatchedDimensions(queryCtx, dataSetId)); llmSchema.setDimensions(getMatchedDimensions(queryCtx, dataSetId));
llmSchema.setTerms(getTerms(queryCtx, dataSetId)); llmSchema.setTerms(getTerms(queryCtx, dataSetId));
llmReq.setSchema(llmSchema); llmReq.setSchema(llmSchema);
String priorExts = getPriorExts(queryCtx, new ArrayList<>(fieldNameList)); String priorKnowledge = getPriorKnowledge(queryCtx, llmSchema);
llmReq.setPriorExts(priorExts); llmReq.setPriorExts(priorKnowledge);
List<LLMReq.ElementValue> linking = new ArrayList<>(); List<LLMReq.ElementValue> linking = new ArrayList<>();
boolean linkingValueEnabled = Boolean.valueOf(parserConfig.getParameterValue(PARSER_LINKING_VALUE_ENABLE)); boolean linkingValueEnabled = Boolean.valueOf(parserConfig.getParameterValue(PARSER_LINKING_VALUE_ENABLE));
@@ -143,12 +123,20 @@ public class LLMRequestService {
}).collect(Collectors.toList()); }).collect(Collectors.toList());
} }
private String getPriorExts(ChatQueryContext queryContext, List<String> fieldNameList) { private String getPriorKnowledge(ChatQueryContext queryContext, LLMReq.LLMSchema llmSchema) {
StringBuilder extraInfoSb = new StringBuilder(); StringBuilder priorKnowledgeBuilder = new StringBuilder();
SemanticSchema semanticSchema = queryContext.getSemanticSchema(); SemanticSchema semanticSchema = queryContext.getSemanticSchema();
// 获取字段名到数据格式类型的映射 appendMetricPriorKnowledge(llmSchema, priorKnowledgeBuilder, semanticSchema);
Map<String, String> fieldNameToDataFormatType = semanticSchema.getMetrics().stream()
// 处理维度字段
appendDimensionPriorKnowledge(llmSchema, priorKnowledgeBuilder, semanticSchema);
return priorKnowledgeBuilder.toString();
}
private Map<String, String> getFieldNameToDataFormatTypeMap(SemanticSchema semanticSchema) {
return semanticSchema.getMetrics().stream()
.filter(metric -> Objects.nonNull(metric.getDataFormatType())) .filter(metric -> Objects.nonNull(metric.getDataFormatType()))
.flatMap(metric -> { .flatMap(metric -> {
Set<Pair<String, String>> fieldFormatPairs = new HashSet<>(); Set<Pair<String, String>> fieldFormatPairs = new HashSet<>();
@@ -161,35 +149,48 @@ public class LLMRequestService {
return fieldFormatPairs.stream(); return fieldFormatPairs.stream();
}) })
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (existing, replacement) -> existing)); .collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (existing, replacement) -> existing));
}
Map<String, String> fieldNameToDateFormat = semanticSchema.getDimensions().stream() private void appendMetricPriorKnowledge(LLMReq.LLMSchema llmSchema,
StringBuilder priorKnowledgeBuilder,
SemanticSchema semanticSchema) {
Map<String, String> fieldNameToDataFormatType = getFieldNameToDataFormatTypeMap(semanticSchema);
for (SchemaElement schemaElement : llmSchema.getMetrics()) {
String fieldName = schemaElement.getName();
String dataFormatType = fieldNameToDataFormatType.get(fieldName);
if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType)
|| DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) {
priorKnowledgeBuilder.append(String.format("%s的计量单位是%s; ", fieldName, "小数"));
}
}
}
private Map<String, String> getFieldNameToDateFormatMap(SemanticSchema semanticSchema) {
return semanticSchema.getDimensions().stream()
.filter(dimension -> StringUtils.isNotBlank(dimension.getTimeFormat())) .filter(dimension -> StringUtils.isNotBlank(dimension.getTimeFormat()))
.collect(Collectors.toMap( .collect(Collectors.toMap(
SchemaElement::getName, SchemaElement::getName,
value -> Optional.ofNullable(value.getPartitionTimeFormat()).orElse(""), value -> Optional.ofNullable(value.getPartitionTimeFormat()).orElse(""),
(k1, k2) -> k1) (k1, k2) -> k1)
); );
}
// 构建额外信息字符串 private void appendDimensionPriorKnowledge(LLMReq.LLMSchema llmSchema,
for (String fieldName : fieldNameList) { StringBuilder priorKnowledgeBuilder,
String dataFormatType = fieldNameToDataFormatType.get(fieldName); SemanticSchema semanticSchema) {
if (DataFormatTypeEnum.DECIMAL.getName().equalsIgnoreCase(dataFormatType) Map<String, String> fieldNameToDateFormat = getFieldNameToDateFormatMap(semanticSchema);
|| DataFormatTypeEnum.PERCENT.getName().equalsIgnoreCase(dataFormatType)) {
extraInfoSb.append(String.format("%s的计量单位是%s; ", fieldName, "小数")); for (SchemaElement schemaElement : llmSchema.getDimensions()) {
} String fieldName = schemaElement.getName();
}
// 构建日期格式化信息
for (String fieldName : fieldNameList) {
String timeFormat = fieldNameToDateFormat.get(fieldName); String timeFormat = fieldNameToDateFormat.get(fieldName);
if (StringUtils.isNotBlank(timeFormat)) { if (StringUtils.isNotBlank(timeFormat)) {
extraInfoSb.append(String.format("%s的日期Format格式是%s; ", fieldName, timeFormat)); priorKnowledgeBuilder.append(String.format("%s是分区时间且格式是%s", fieldName, timeFormat));
} }
} }
return extraInfoSb.toString();
} }
public List<LLMReq.ElementValue> getValues(ChatQueryContext queryCtx, Long dataSetId) { public List<LLMReq.ElementValue> getValues(ChatQueryContext queryCtx, Long dataSetId) {
Map<Long, String> itemIdToName = getItemIdToName(queryCtx, dataSetId);
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId);
if (CollectionUtils.isEmpty(matchedElements)) { if (CollectionUtils.isEmpty(matchedElements)) {
return new ArrayList<>(); return new ArrayList<>();
@@ -203,20 +204,13 @@ public class LLMRequestService {
}) })
.map(elementMatch -> { .map(elementMatch -> {
LLMReq.ElementValue elementValue = new LLMReq.ElementValue(); LLMReq.ElementValue elementValue = new LLMReq.ElementValue();
elementValue.setFieldName(itemIdToName.get(elementMatch.getElement().getId())); elementValue.setFieldName(elementMatch.getElement().getName());
elementValue.setFieldValue(elementMatch.getWord()); elementValue.setFieldValue(elementMatch.getWord());
return elementValue; return elementValue;
}).collect(Collectors.toSet()); }).collect(Collectors.toSet());
return new ArrayList<>(valueMatches); return new ArrayList<>(valueMatches);
} }
protected Map<Long, String> getItemIdToName(ChatQueryContext queryCtx, Long dataSetId) {
SemanticSchema semanticSchema = queryCtx.getSemanticSchema();
List<SchemaElement> elements = semanticSchema.getDimensions(dataSetId);
return elements.stream().collect(
Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2));
}
protected List<SchemaElement> getMatchedMetrics(ChatQueryContext queryCtx, Long dataSetId) { protected List<SchemaElement> getMatchedMetrics(ChatQueryContext queryCtx, Long dataSetId) {
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId);
if (CollectionUtils.isEmpty(matchedElements)) { if (CollectionUtils.isEmpty(matchedElements)) {
@@ -235,40 +229,40 @@ public class LLMRequestService {
} }
protected List<SchemaElement> getMatchedDimensions(ChatQueryContext queryCtx, Long dataSetId) { protected List<SchemaElement> getMatchedDimensions(ChatQueryContext queryCtx, Long dataSetId) {
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId);
if (CollectionUtils.isEmpty(matchedElements)) {
return Collections.emptyList();
}
return matchedElements.stream()
.filter(schemaElementMatch -> {
SchemaElementType elementType = schemaElementMatch.getElement().getType();
return SchemaElementType.DIMENSION.equals(elementType);
})
.map(schemaElementMatch -> schemaElementMatch.getElement())
.collect(Collectors.toList());
}
protected Set<String> getMatchedFieldNames(ChatQueryContext queryCtx, Long dataSetId) {
Map<Long, String> itemIdToName = getItemIdToName(queryCtx, dataSetId);
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId); List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(dataSetId);
if (CollectionUtils.isEmpty(matchedElements)) { Set<SchemaElement> results = new HashSet<>();
return new HashSet<>();
} if (!CollectionUtils.isEmpty(matchedElements)) {
return matchedElements.stream() results = matchedElements.stream()
.filter(schemaElementMatch -> { .filter(element -> SchemaElementType.DIMENSION.equals(element.getElement().getType()))
SchemaElementType elementType = schemaElementMatch.getElement().getType(); .map(SchemaElementMatch::getElement)
return SchemaElementType.METRIC.equals(elementType)
|| SchemaElementType.DIMENSION.equals(elementType)
|| SchemaElementType.VALUE.equals(elementType);
})
.map(schemaElementMatch -> {
SchemaElement element = schemaElementMatch.getElement();
SchemaElementType elementType = element.getType();
if (SchemaElementType.VALUE.equals(elementType)) {
return itemIdToName.get(element.getId());
}
return schemaElementMatch.getWord();
})
.collect(Collectors.toSet()); .collect(Collectors.toSet());
} }
SemanticSchema semanticSchema = queryCtx.getSemanticSchema();
if (semanticSchema == null || semanticSchema.getDataSetSchemaMap() == null) {
return new ArrayList<>(results);
}
DataSetSchema dataSetSchema = semanticSchema.getDataSetSchemaMap().get(dataSetId);
if (dataSetSchema == null) {
return new ArrayList<>(results);
}
TimeDefaultConfig timeDefaultConfig = dataSetSchema.getTagTypeTimeDefaultConfig();
SchemaElement partitionDimension = dataSetSchema.getPartitionDimension();
if (timeDefaultConfig == null || partitionDimension == null) {
return new ArrayList<>(results);
}
if (Objects.equals(timeDefaultConfig.getUnit(), -1)) {
results.remove(partitionDimension);
} else {
results.add(partitionDimension);
}
return new ArrayList<>(results);
}
} }

View File

@@ -7,81 +7,61 @@ import com.tencent.supersonic.common.pojo.ChatModelConfig;
import com.tencent.supersonic.common.pojo.Text2SQLExemplar; import com.tencent.supersonic.common.pojo.Text2SQLExemplar;
import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import lombok.Data; import lombok.Data;
import org.apache.commons.collections4.CollectionUtils;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
@Data @Data
public class LLMReq { public class LLMReq {
private String queryText; private String queryText;
private FilterCondition filterCondition;
private LLMSchema schema; private LLMSchema schema;
private List<ElementValue> linking; private List<ElementValue> linking;
private String currentDate; private String currentDate;
private String priorExts; private String priorExts;
private SqlGenType sqlGenType; private SqlGenType sqlGenType;
private ChatModelConfig modelConfig; private ChatModelConfig modelConfig;
private PromptConfig promptConfig; private PromptConfig promptConfig;
private List<Text2SQLExemplar> dynamicExemplars; private List<Text2SQLExemplar> dynamicExemplars;
@Data @Data
public static class ElementValue { public static class ElementValue {
private String fieldName; private String fieldName;
private String fieldValue; private String fieldValue;
} }
@Data @Data
public static class LLMSchema { public static class LLMSchema {
private String domainName;
private String dataSetName;
private Long dataSetId; private Long dataSetId;
private String dataSetName;
private List<String> fieldNameList; private List<String> fieldNameList;
private List<SchemaElement> metrics; private List<SchemaElement> metrics;
private List<SchemaElement> dimensions; private List<SchemaElement> dimensions;
private List<Term> terms; private List<Term> terms;
public List<String> getFieldNameList() {
List<String> fieldNameList = new ArrayList<>();
if (CollectionUtils.isNotEmpty(metrics)) {
fieldNameList.addAll(metrics.stream().map(metric -> metric.getName()).collect(Collectors.toList()));
}
if (CollectionUtils.isNotEmpty(dimensions)) {
fieldNameList.addAll(dimensions.stream().map(metric -> metric.getName()).collect(Collectors.toList()));
}
return fieldNameList;
} }
@Data
public static class FilterCondition {
private String tableName;
} }
@Data @Data
public static class Term { public static class Term {
private String name; private String name;
private String description; private String description;
private List<String> alias = Lists.newArrayList(); private List<String> alias = Lists.newArrayList();
} }
public enum SqlGenType { public enum SqlGenType {
ONE_PASS_SELF_CONSISTENCY("1_pass_self_consistency"); ONE_PASS_SELF_CONSISTENCY("1_pass_self_consistency");
private String name; private String name;
SqlGenType(String name) { SqlGenType(String name) {

View File

@@ -26,11 +26,7 @@ class SchemaCorrectorTest {
+ " \"dataSetId\": 1,\n" + " \"dataSetId\": 1,\n"
+ " \"llmReq\": {\n" + " \"llmReq\": {\n"
+ " \"queryText\": \"xxx2024年播放量最高的十首歌\",\n" + " \"queryText\": \"xxx2024年播放量最高的十首歌\",\n"
+ " \"filterCondition\": {\n"
+ " \"tableName\": null\n"
+ " },\n"
+ " \"schema\": {\n" + " \"schema\": {\n"
+ " \"domainName\": \"歌曲\",\n"
+ " \"dataSetName\": \"歌曲\",\n" + " \"dataSetName\": \"歌曲\",\n"
+ " \"fieldNameList\": [\n" + " \"fieldNameList\": [\n"
+ " \"商务组\",\n" + " \"商务组\",\n"

View File

@@ -1,50 +1,50 @@
[ [
{ {
"question": "比较jackjchen和robinlee今年以来的访问次数", "question": "比较jackjchen和robinlee今年以来的访问次数",
"sideInfo": "CurrentDate=[2020-12-01],DomainTerms=[<核心用户 COMMENT '核心用户指tom和lucy'>]", "sideInfo": "CurrentDate=[2020-12-01],DomainTerms=[<核心用户 COMMENT '核心用户指tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[], Values[<用户='jackjchen'>,<用户='robinlee'>]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<数据日期>], Values[<用户='jackjchen'>,<用户='robinlee'>]",
"sql": "SELECT 用户, 访问次数 FROM 超音数产品 WHERE 用户 IN ('jackjchen', 'robinlee') AND 数据日期 >= '2020-01-01' AND 数据日期 <= '2020-12-01'" "sql": "SELECT 用户, 访问次数 FROM 超音数产品 WHERE 用户 IN ('jackjchen', 'robinlee') AND 数据日期 >= '2020-01-01' AND 数据日期 <= '2020-12-01'"
}, },
{ {
"question": "超音数近12个月访问人数 按部门", "question": "超音数近12个月访问人数 按部门",
"sideInfo": "CurrentDate=[2022-11-06]", "sideInfo": "CurrentDate=[2022-11-06],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<部门>], Values=[]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>,<访问用户数 ALIAS 'UV,访问人数,' COMMENT '访问的用户个数' AGGREGATE 'COUNT'>,<人均访问次数 ALIAS '平均访问次数,' COMMENT '每个用户平均访问的次数'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE 数据日期 >= '2021-11-06' AND 数据日期 <= '2022-11-06'" "sql": "SELECT 部门, 数据日期, 访问人数 FROM 超音数产品 WHERE 数据日期 >= '2021-11-06' AND 数据日期 <= '2022-11-06'"
}, },
{ {
"question": "超音数过去90天美术部、技术研发部的访问时长", "question": "超音数过去90天美术部、技术研发部的访问时长",
"sideInfo": "CurrentDate=[2023-04-21]", "sideInfo": "CurrentDate=[2023-04-21],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions=[], Values=[<部门='美术部'>,<部门='技术研发部'>]", "dbSchema": "Table=[超音数产品], Metrics=[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions=[<数据日期>], Values=[<部门='美术部'>,<部门='技术研发部'>]",
"sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部') AND 数据日期 >= '2023-01-20' AND 数据日期 <= '2023-04-21'" "sql": "SELECT 部门, 访问时长 FROM 超音数产品 WHERE 部门 IN ('美术部', '技术研发部') AND 数据日期 >= '2023-01-20' AND 数据日期 <= '2023-04-21'"
}, },
{ {
"question": "超音数访问时长小于1小时且来自美术部的用户是哪些", "question": "超音数访问时长小于1小时且来自美术部的用户是哪些",
"sideInfo": "CurrentDate=[2023-07-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>]", "sideInfo": "CurrentDate=[2023-07-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table:[超音数产品], Metrics:[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions:[<用户>], Values:[<部门='美术部'>]", "dbSchema": "Table:[超音数产品], Metrics:[<访问时长 COMMENT '一段时间内用户的访问时长' AGGREGATE 'SUM'>], Dimensions:[<用户>,<数据日期>], Values:[<部门='美术部'>]",
"sql": "SELECT 用户 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1" "sql": "SELECT 用户 FROM 超音数产品 WHERE 部门 = '美术部' AND 访问时长 < 1"
}, },
{ {
"question": "超音数本月pv最高的用户有哪些", "question": "超音数本月pv最高的用户有哪些",
"sideInfo": "CurrentDate=[2023-08-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>]", "sideInfo": "CurrentDate=[2023-08-31],DomainTerms=[<核心用户 COMMENT '用户为tom和lucy'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>], Values=[]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>,<数据日期>], Values=[]",
"sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-08-01' AND 数据日期 <= '2023-08-31' ORDER BY 访问次数 DESC LIMIT 1" "sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-08-01' AND 数据日期 <= '2023-08-31' ORDER BY 访问次数 DESC LIMIT 1"
}, },
{ {
"question": "超音数访问次数大于1k的部门是哪些", "question": "超音数访问次数大于1k的部门是哪些",
"sideInfo": "CurrentDate=[2023-09-14]", "sideInfo": "CurrentDate=[2023-09-14],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>], Values=[]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000" "sql": "SELECT 部门 FROM 超音数产品 WHERE 访问次数 > 1000"
}, },
{ {
"question": "过去半个月核心用户的访问次数", "question": "过去半个月核心用户的访问次数",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>]", "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<核心用户 COMMENT '用户为alice'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>], Values=[]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<部门>,<数据日期>], Values=[]",
"sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'" "sql": "SELECT 用户,SUM(访问次数) FROM 超音数产品 WHERE 用户='alice' AND 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15'"
}, },
{ {
"question": "过去半个月忠实用户有哪一些", "question": "过去半个月忠实用户有哪一些",
"sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<忠实用户 COMMENT '一段时间内总访问次数大于100的用户'>]", "sideInfo": "CurrentDate=[2023-09-15],DomainTerms=[<忠实用户 COMMENT '一段时间内总访问次数大于100的用户'>],PriorKnowledge=[数据日期 是分区时间且格式是yyyy-MM-dd]",
"dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>], Values=[]", "dbSchema": "Table=[超音数产品], Metrics=[<访问次数 ALIAS 'pv' COMMENT '一段时间内用户的访问次数' AGGREGATE 'SUM'>], Dimensions=[<用户>,<数据日期>], Values=[]",
"sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户 HAVING SUM(访问次数) > 100" "sql": "SELECT 用户 FROM 超音数产品 WHERE 数据日期 >= '2023-09-01' AND 数据日期 <= '2023-09-15' GROUP BY 用户 HAVING SUM(访问次数) > 100"
} }
] ]