From 0a4272c25e8afa5581e0771501725b4f13f84a51 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Sun, 4 Aug 2024 23:01:54 +0800 Subject: [PATCH] (improvement)(chat) Enable partition dates in the demo and support partition dates at the dataset granularity. (#1513) --- .../headless/chat/utils/QueryReqBuilder.java | 126 ++++++++++-------- .../tencent/supersonic/demo/S2VisitsDemo.java | 10 +- .../data/dictionary/custom/DimValue_1_2.txt | 22 +-- .../data/dictionary/custom/DimValue_1_3.txt | 12 +- .../data/dictionary/custom/DimValue_4_8.txt | 9 ++ .../data/dictionary/custom/DimValue_4_9.txt | 9 -- .../com/tencent/supersonic/chat/TagTest.java | 2 +- .../data/dictionary/custom/DimValue_1_2.txt | 22 +-- .../data/dictionary/custom/DimValue_1_3.txt | 12 +- .../data/dictionary/custom/DimValue_4_8.txt | 9 ++ .../data/dictionary/custom/DimValue_4_9.txt | 9 -- 11 files changed, 125 insertions(+), 117 deletions(-) create mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_8.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_9.txt create mode 100644 launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_8.txt delete mode 100644 launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_9.txt diff --git a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/utils/QueryReqBuilder.java b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/utils/QueryReqBuilder.java index 2fbaa2f89..5d9eb7be0 100644 --- a/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/utils/QueryReqBuilder.java +++ b/headless/chat/src/main/java/com/tencent/supersonic/headless/chat/utils/QueryReqBuilder.java @@ -18,20 +18,20 @@ import com.tencent.supersonic.headless.api.pojo.request.QueryMultiStructReq; import com.tencent.supersonic.headless.api.pojo.request.QuerySqlReq; import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq; import com.tencent.supersonic.headless.chat.query.QueryManager; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.springframework.beans.BeanUtils; -import org.springframework.util.CollectionUtils; - import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.springframework.beans.BeanUtils; +import org.springframework.util.CollectionUtils; @Slf4j public class QueryReqBuilder { @@ -161,78 +161,86 @@ public class QueryReqBuilder { } private static List getAggregatorByMetric(AggregateTypeEnum aggregateType, SchemaElement metric) { - List aggregators = new ArrayList<>(); - if (metric != null) { - String agg = ""; - if (Objects.isNull(aggregateType) || aggregateType.equals(AggregateTypeEnum.NONE) - || AggOperatorEnum.COUNT_DISTINCT.name().equalsIgnoreCase(metric.getDefaultAgg())) { - if (StringUtils.isNotBlank(metric.getDefaultAgg())) { - agg = metric.getDefaultAgg(); - } - } else { - agg = aggregateType.name(); - } - aggregators.add(new Aggregator(metric.getBizName(), AggOperatorEnum.of(agg))); + if (metric == null) { + return Collections.emptyList(); } - return aggregators; + + String agg = determineAggregator(aggregateType, metric); + return Collections.singletonList(new Aggregator(metric.getBizName(), AggOperatorEnum.of(agg))); + } + + private static String determineAggregator(AggregateTypeEnum aggregateType, SchemaElement metric) { + if (aggregateType == null || aggregateType.equals(AggregateTypeEnum.NONE) + || AggOperatorEnum.COUNT_DISTINCT.name().equalsIgnoreCase(metric.getDefaultAgg())) { + return StringUtils.defaultIfBlank(metric.getDefaultAgg(), ""); + } + return aggregateType.name(); } private static void addDateDimension(SemanticParseInfo parseInfo) { - if (parseInfo != null) { - String queryMode = parseInfo.getQueryMode(); - if (parseInfo.getDateInfo() == null) { - return; - } - if (parseInfo.getAggType() != null && (parseInfo.getAggType().equals(AggregateTypeEnum.MAX) - || parseInfo.getAggType().equals(AggregateTypeEnum.MIN)) && !CollectionUtils.isEmpty( - parseInfo.getDimensions())) { - return; - } - DateConf dateInfo = parseInfo.getDateInfo(); - String dateField = getDateField(dateInfo); + if (parseInfo == null || parseInfo.getDateInfo() == null) { + return; + } - for (SchemaElement dimension : parseInfo.getDimensions()) { - if (dimension.getBizName().equalsIgnoreCase(dateField)) { - return; - } - } + if (shouldSkipAddingDateDimension(parseInfo)) { + return; + } - if (Objects.nonNull(parseInfo.getAggType()) && !parseInfo.getAggType().equals(AggregateTypeEnum.NONE)) { - return; - } + String dateField = getDateField(parseInfo.getDateInfo()); + if (isDateFieldAlreadyPresent(parseInfo, dateField)) { + return; + } - SchemaElement dimension = new SchemaElement(); - dimension.setBizName(dateField); + SchemaElement dimension = new SchemaElement(); + dimension.setBizName(dateField); - if (QueryManager.isMetricQuery(queryMode)) { - List timeDimensions = Arrays.asList(TimeDimensionEnum.DAY.getName(), - TimeDimensionEnum.WEEK.getName(), TimeDimensionEnum.MONTH.getName()); - Set dimensions = parseInfo.getDimensions().stream() - .filter(d -> !timeDimensions.contains(d.getBizName().toLowerCase())).collect( - Collectors.toSet()); - dimensions.add(dimension); - parseInfo.setDimensions(dimensions); - } + if (QueryManager.isMetricQuery(parseInfo.getQueryMode())) { + addDimension(parseInfo, dimension); } } - public static Set getOrder(Set parseOrder, AggregateTypeEnum aggregator, SchemaElement metric) { - if (!CollectionUtils.isEmpty(parseOrder)) { - return parseOrder; - } - Set orders = new LinkedHashSet(); - if (metric == null) { - return orders; + private static boolean shouldSkipAddingDateDimension(SemanticParseInfo parseInfo) { + return parseInfo.getAggType() != null + && (parseInfo.getAggType().equals(AggregateTypeEnum.MAX) + || parseInfo.getAggType().equals(AggregateTypeEnum.MIN)) + && !CollectionUtils.isEmpty(parseInfo.getDimensions()); + } + + private static boolean isDateFieldAlreadyPresent(SemanticParseInfo parseInfo, String dateField) { + return parseInfo.getDimensions().stream() + .anyMatch(dimension -> dimension.getBizName().equalsIgnoreCase(dateField)); + } + + private static void addDimension(SemanticParseInfo parseInfo, SchemaElement dimension) { + List timeDimensions = Arrays.asList(TimeDimensionEnum.DAY.getName(), + TimeDimensionEnum.WEEK.getName(), TimeDimensionEnum.MONTH.getName()); + Set dimensions = parseInfo.getDimensions().stream() + .filter(d -> !timeDimensions.contains(d.getBizName().toLowerCase())) + .collect(Collectors.toSet()); + dimensions.add(dimension); + parseInfo.setDimensions(dimensions); + } + + public static Set getOrder(Set existingOrders, + AggregateTypeEnum aggregator, SchemaElement metric) { + if (existingOrders != null && !existingOrders.isEmpty()) { + return existingOrders; } - if ((AggregateTypeEnum.TOPN.equals(aggregator) || AggregateTypeEnum.MAX.equals(aggregator) - || AggregateTypeEnum.MIN.equals( - aggregator))) { + if (metric == null) { + return Collections.emptySet(); + } + + Set orders = new LinkedHashSet<>(); + if (aggregator == AggregateTypeEnum.TOPN + || aggregator == AggregateTypeEnum.MAX + || aggregator == AggregateTypeEnum.MIN) { Order order = new Order(); order.setColumn(metric.getBizName()); order.setDirection("desc"); orders.add(order); } + return orders; } diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/demo/S2VisitsDemo.java b/launchers/standalone/src/main/java/com/tencent/supersonic/demo/S2VisitsDemo.java index c6bf89162..25d0e1013 100644 --- a/launchers/standalone/src/main/java/com/tencent/supersonic/demo/S2VisitsDemo.java +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/demo/S2VisitsDemo.java @@ -189,7 +189,7 @@ public class S2VisitsDemo extends S2BaseDemo { } public ModelResp addModel_1(DomainResp s2Domain, DatabaseResp s2Database, - TagObjectResp s2TagObject) throws Exception { + TagObjectResp s2TagObject) throws Exception { ModelReq modelReq = new ModelReq(); modelReq.setName("用户部门"); modelReq.setBizName("user_department"); @@ -282,7 +282,7 @@ public class S2VisitsDemo extends S2BaseDemo { modelDetail.setIdentifiers(identifiers); List dimensions = new ArrayList<>(); - Dim dimension1 = new Dim("", "imp_date", DimensionType.time.name(), 0); + Dim dimension1 = new Dim("数据日期", "imp_date", DimensionType.partition_time.name(), 1); dimension1.setTypeParams(new DimensionTimeTypeParams()); dimensions.add(dimension1); Dim dimension2 = new Dim("页面", "page", DimensionType.categorical.name(), 1); @@ -352,7 +352,7 @@ public class S2VisitsDemo extends S2BaseDemo { } public void updateMetric(ModelResp stayTimeModel, DimensionResp departmentDimension, - DimensionResp userDimension) throws Exception { + DimensionResp userDimension) throws Exception { MetricResp stayHoursMetric = metricService.getMetric(stayTimeModel.getId(), "stay_hours"); MetricReq metricReq = new MetricReq(); @@ -378,7 +378,7 @@ public class S2VisitsDemo extends S2BaseDemo { } public void updateMetric_pv(ModelResp pvUvModel, DimensionResp departmentDimension, - DimensionResp userDimension, MetricResp metricPv) throws Exception { + DimensionResp userDimension, MetricResp metricPv) throws Exception { MetricReq metricReq = new MetricReq(); metricReq.setModelId(pvUvModel.getId()); metricReq.setId(metricPv.getId()); @@ -420,7 +420,7 @@ public class S2VisitsDemo extends S2BaseDemo { } public MetricResp addMetric_pv_avg(MetricResp metricPv, MetricResp metricUv, - DimensionResp departmentDimension, ModelResp pvModel) throws Exception { + DimensionResp departmentDimension, ModelResp pvModel) throws Exception { MetricReq metricReq = new MetricReq(); metricReq.setModelId(pvModel.getId()); metricReq.setName("人均访问次数"); diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_2.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_2.txt index b01cf97ae..5836055ca 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_2.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_2.txt @@ -5,14 +5,14 @@ dean _1_2 36 john _1_2 50 jack _1_2 38 admin _1_2 70 -周杰伦 _4_7 100 -陈奕迅 _4_7 100 -林俊杰 _4_7 100 -张碧晨 _4_7 100 -程响 _4_7 100 -Taylor#Swift _4_7 100 -内地 _4_4 100 -欧美 _4_4 100 -港台 _4_4 100 -流行 _4_6 100 -国风 _4_6 100 \ No newline at end of file +周杰伦 _4_8 100 +陈奕迅 _4_8 100 +林俊杰 _4_8 100 +张碧晨 _4_8 100 +程响 _4_8 100 +Taylor#Swift _4_8 100 +内地 _4_5 100 +欧美 _4_5 100 +港台 _4_5 100 +流行 _4_7 100 +国风 _4_7 100 \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_3.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_3.txt index e442a219c..c80af2b98 100644 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_3.txt +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_1_3.txt @@ -1,6 +1,6 @@ -p1 _3_3 52 -p2 _3_3 47 -p3 _3_3 31 -p4 _3_3 36 -p5 _3_3 50 -p6 _3_3 38 \ No newline at end of file +p1 _3_4 52 +p2 _3_4 47 +p3 _3_4 31 +p4 _3_4 36 +p5 _3_4 50 +p6 _3_4 38 \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_8.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_8.txt new file mode 100644 index 000000000..2067115b5 --- /dev/null +++ b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_8.txt @@ -0,0 +1,9 @@ +周杰伦 _4_8 9000 +周深 _4_8 8000 +周传雄 _4_8 7000 +周华建 _4_8 6000 +陈奕迅 _4_8 8000 +林俊杰 _4_8 7000 +张碧晨 _4_8 7000 +程响 _4_8 7000 +Taylor#Swift _4_8 7000 \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_9.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_9.txt deleted file mode 100644 index 3bc23d613..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/DimValue_4_9.txt +++ /dev/null @@ -1,9 +0,0 @@ -周杰伦 _4_7 9000 -周深 _4_7 8000 -周传雄 _4_7 7000 -周华建 _4_7 6000 -陈奕迅 _4_7 8000 -林俊杰 _4_7 7000 -张碧晨 _4_7 7000 -程响 _4_7 7000 -Taylor#Swift _4_7 7000 \ No newline at end of file diff --git a/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java b/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java index 1d4937780..d1a951c62 100644 --- a/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java +++ b/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java @@ -33,7 +33,7 @@ public class TagTest extends BaseTest { expectedParseInfo.setAggType(AggregateTypeEnum.NONE); QueryFilter dimensionFilter = DataUtils.getFilter("genre", FilterOperatorEnum.EQUALS, - "流行", "风格", 6L); + "流行", "风格", 7L); expectedParseInfo.getDimensionFilters().add(dimensionFilter); SchemaElement metric = SchemaElement.builder().name("播放量").build(); diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_2.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_2.txt index b01cf97ae..5836055ca 100644 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_2.txt +++ b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_2.txt @@ -5,14 +5,14 @@ dean _1_2 36 john _1_2 50 jack _1_2 38 admin _1_2 70 -周杰伦 _4_7 100 -陈奕迅 _4_7 100 -林俊杰 _4_7 100 -张碧晨 _4_7 100 -程响 _4_7 100 -Taylor#Swift _4_7 100 -内地 _4_4 100 -欧美 _4_4 100 -港台 _4_4 100 -流行 _4_6 100 -国风 _4_6 100 \ No newline at end of file +周杰伦 _4_8 100 +陈奕迅 _4_8 100 +林俊杰 _4_8 100 +张碧晨 _4_8 100 +程响 _4_8 100 +Taylor#Swift _4_8 100 +内地 _4_5 100 +欧美 _4_5 100 +港台 _4_5 100 +流行 _4_7 100 +国风 _4_7 100 \ No newline at end of file diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_3.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_3.txt index e442a219c..c80af2b98 100644 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_3.txt +++ b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_1_3.txt @@ -1,6 +1,6 @@ -p1 _3_3 52 -p2 _3_3 47 -p3 _3_3 31 -p4 _3_3 36 -p5 _3_3 50 -p6 _3_3 38 \ No newline at end of file +p1 _3_4 52 +p2 _3_4 47 +p3 _3_4 31 +p4 _3_4 36 +p5 _3_4 50 +p6 _3_4 38 \ No newline at end of file diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_8.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_8.txt new file mode 100644 index 000000000..2067115b5 --- /dev/null +++ b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_8.txt @@ -0,0 +1,9 @@ +周杰伦 _4_8 9000 +周深 _4_8 8000 +周传雄 _4_8 7000 +周华建 _4_8 6000 +陈奕迅 _4_8 8000 +林俊杰 _4_8 7000 +张碧晨 _4_8 7000 +程响 _4_8 7000 +Taylor#Swift _4_8 7000 \ No newline at end of file diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_9.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_9.txt deleted file mode 100644 index 3bc23d613..000000000 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/DimValue_4_9.txt +++ /dev/null @@ -1,9 +0,0 @@ -周杰伦 _4_7 9000 -周深 _4_7 8000 -周传雄 _4_7 7000 -周华建 _4_7 6000 -陈奕迅 _4_7 8000 -林俊杰 _4_7 7000 -张碧晨 _4_7 7000 -程响 _4_7 7000 -Taylor#Swift _4_7 7000 \ No newline at end of file