From 87332319760db6cd29ca4bfdab03e407a3c4e947 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Mon, 18 Mar 2024 12:07:49 +0800 Subject: [PATCH] (improvement)(Headless) Abstracted tags from dimensions and metrics. (#828) --- .../common/pojo/enums/DictWordType.java | 15 ++- .../headless/api/pojo/DataSetSchema.java | 7 +- .../headless/api/pojo/SchemaElementType.java | 5 +- .../headless/api/pojo/SemanticSchema.java | 39 +++---- .../api/pojo/request/ItemValueReq.java | 2 +- .../api/pojo/response/DictItemResp.java | 7 +- .../api/pojo/response/MeasureResp.java | 1 + .../core/chat/knowledge/KnowledgeService.java | 4 +- .../core/chat/knowledge/SearchService.java | 6 +- .../builder/DimensionWordBuilder.java | 4 +- .../knowledge/builder/EntityWordBuilder.java | 2 +- .../knowledge/builder/MetricWordBuilder.java | 4 +- .../knowledge/builder/TagWordBuilder.java | 41 ------- .../knowledge/builder/WordBuilderFactory.java | 1 - .../chat/knowledge/helper/NatureHelper.java | 16 +-- .../core/chat/mapper/EntityMapper.java | 6 +- .../core/chat/mapper/KeywordMapper.java | 3 +- .../core/chat/mapper/QueryFilterMapper.java | 4 +- .../core/chat/mapper/SearchMatchStrategy.java | 2 +- .../core/chat/parser/QueryTypeParser.java | 61 +++++++++- .../chat/parser/llm/LLMRequestService.java | 63 ++++------- .../parser/rule/ContextInheritParser.java | 1 - .../core/chat/parser/rule/RuleSqlParser.java | 19 +--- .../chat/query/rule/RuleSemanticQuery.java | 58 ++++------ .../chat/query/rule/tag/TagDetailQuery.java | 10 +- .../chat/query/rule/tag/TagFilterQuery.java | 13 +-- .../server/service/impl/QueryServiceImpl.java | 6 +- .../service/impl/TagMetaServiceImpl.java | 1 - .../service/impl/TagQueryServiceImpl.java | 2 +- .../server/service/impl/WordService.java | 1 - .../server/utils/DataSetSchemaBuilder.java | 104 ++++++++++-------- .../headless/server/utils/DictUtils.java | 25 +++-- .../supersonic/ModelDemoDataLoader.java | 1 - .../data/dictionary/custom/TagValue_4_1.txt | 3 - .../data/dictionary/custom/TagValue_4_2.txt | 2 - .../data/dictionary/custom/TagValue_4_4.txt | 6 - .../com/tencent/supersonic/chat/TagTest.java | 2 +- .../data/dictionary/custom/TagValue_4_1.txt | 3 - .../data/dictionary/custom/TagValue_4_2.txt | 2 - .../data/dictionary/custom/TagValue_4_4.txt | 6 - 40 files changed, 236 insertions(+), 322 deletions(-) delete mode 100644 headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/TagWordBuilder.java delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_1.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_2.txt delete mode 100644 launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_4.txt delete mode 100644 launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_1.txt delete mode 100644 launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_2.txt delete mode 100644 launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_4.txt diff --git a/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java b/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java index 33f2f1781..f8e4703f4 100644 --- a/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java +++ b/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java @@ -13,9 +13,9 @@ public enum DictWordType { DIMENSION("dimension"), - VALUE("dv"), + VALUE("value"), - DATASET("dataset"), + DATASET("dataSet"), ENTITY("entity"), @@ -23,8 +23,6 @@ public enum DictWordType { TAG("tag"), - TAG_VALUE("tv"), - SUFFIX("suffix"); public static final String NATURE_SPILT = "_"; @@ -35,7 +33,7 @@ public enum DictWordType { this.type = type; } - public String getTypeWithSpilt() { + public String getType() { return NATURE_SPILT + type; } @@ -44,7 +42,7 @@ public enum DictWordType { return null; } for (DictWordType dictWordType : values()) { - if (nature.endsWith(dictWordType.getTypeWithSpilt())) { + if (nature.endsWith(dictWordType.getType())) { return dictWordType; } } @@ -54,7 +52,7 @@ public enum DictWordType { return DATASET; } //dimension value - if (natures.length >= 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) { + if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) { return VALUE; } return null; @@ -76,4 +74,5 @@ public enum DictWordType { } return ""; } -} + +} \ No newline at end of file diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java index 46a94abd5..ef0d198c4 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/DataSetSchema.java @@ -9,13 +9,11 @@ import java.util.Set; @Data public class DataSetSchema { - private SchemaElement dataSet; private Set metrics = new HashSet<>(); private Set dimensions = new HashSet<>(); - private Set dimensionValues = new HashSet<>(); private Set tags = new HashSet<>(); - private Set tagValues = new HashSet<>(); + private Set dimensionValues = new HashSet<>(); private SchemaElement entity = new SchemaElement(); private QueryConfig queryConfig; private QueryType queryType; @@ -42,9 +40,6 @@ public class DataSetSchema { case TAG: element = tags.stream().filter(e -> e.getId() == elementID).findFirst(); break; - case TAG_VALUE: - element = tagValues.stream().filter(e -> e.getId() == elementID).findFirst(); - break; default: } diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java index 5c4b90dd5..ee316eeb1 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java @@ -6,8 +6,7 @@ public enum SchemaElementType { DIMENSION, VALUE, ENTITY, - TAG, - TAG_VALUE, ID, - DATE + DATE, + TAG } diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticSchema.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticSchema.java index 79d083504..78a066530 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticSchema.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SemanticSchema.java @@ -44,9 +44,6 @@ public class SemanticSchema implements Serializable { case TAG: element = getElementsById(elementID, getTags()); break; - case TAG_VALUE: - element = getElementsById(elementID, getTagValues()); - break; default: } @@ -85,28 +82,6 @@ public class SemanticSchema implements Serializable { return dimension.orElse(null); } - public List getTags() { - List tags = new ArrayList<>(); - dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTags())); - return tags; - } - - public List getTags(Long dataSetId) { - List tags = getTags(); - return getElementsByDataSetId(dataSetId, tags); - } - - public List getTagValues() { - List tags = new ArrayList<>(); - dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTagValues())); - return tags; - } - - public List getTagValues(Long dataSetId) { - List tags = getTagValues(); - return getElementsByDataSetId(dataSetId, tags); - } - public List getMetrics() { List metrics = new ArrayList<>(); dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics())); @@ -129,6 +104,20 @@ public class SemanticSchema implements Serializable { return getElementsByDataSetId(dataSetId, entities); } + public List getTags() { + List tags = new ArrayList<>(); + dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTags())); + return tags; + } + + public List getTags(Long dataSetId) { + List tags = new ArrayList<>(); + dataSetSchemaList.stream().filter(schemaElement -> + dataSetId.equals(schemaElement.getDataSet().getDataSet())) + .forEach(d -> tags.addAll(d.getTags())); + return tags; + } + private List getElementsByDataSetId(Long dataSetId, List elements) { return elements.stream() .filter(schemaElement -> dataSetId.equals(schemaElement.getDataSet())) diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/request/ItemValueReq.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/request/ItemValueReq.java index bb05a6a11..78818a4d4 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/request/ItemValueReq.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/request/ItemValueReq.java @@ -11,7 +11,7 @@ import javax.validation.constraints.NotNull; @ToString public class ItemValueReq { - private SchemaElementType type = SchemaElementType.TAG; + private SchemaElementType type; @NotNull private Long itemId; diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java index 65e531b2c..e3ffff340 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java @@ -1,6 +1,5 @@ package com.tencent.supersonic.headless.api.pojo.response; -import static com.tencent.supersonic.common.pojo.Constants.DICT_VALUE; import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE; import com.tencent.supersonic.common.pojo.enums.StatusEnum; @@ -32,10 +31,8 @@ public class DictItemResp { @NotNull private StatusEnum status; - public String generateNature() { - return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1) - + DICT_VALUE; - + public String getNature() { + return UNDERLINE + modelId + UNDERLINE + itemId; } public String fetchDictFileName() { diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/MeasureResp.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/MeasureResp.java index 4ee4ff0be..5c1941400 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/MeasureResp.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/MeasureResp.java @@ -27,4 +27,5 @@ public class MeasureResp { private Long modelId; + private int isTag; } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/KnowledgeService.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/KnowledgeService.java index 85eab7df4..f14e45d3b 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/KnowledgeService.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/KnowledgeService.java @@ -18,7 +18,7 @@ public class KnowledgeService { public void updateSemanticKnowledge(List natures) { List prefixes = natures.stream() - .filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) + .filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) .collect(Collectors.toList()); for (DictWord nature : prefixes) { @@ -26,7 +26,7 @@ public class KnowledgeService { } List suffixes = natures.stream() - .filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) + .filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) .collect(Collectors.toList()); SearchService.loadSuffix(suffixes); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/SearchService.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/SearchService.java index 46adffa7c..4174b82f1 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/SearchService.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/SearchService.java @@ -88,7 +88,7 @@ public class SearchService { entry -> { String name = entry.getKey().replace("#", " "); List natures = entry.getValue().stream() - .map(nature -> nature.replaceAll(DictWordType.SUFFIX.getTypeWithSpilt(), "")) + .map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), "")) .collect(Collectors.toList()); name = StringUtils.reverse(name); return new HanlpMapResult(name, natures, key); @@ -169,8 +169,8 @@ public class SearchService { if (Objects.nonNull(natures) && natures.length > 0) { trie.put(dictWord.getWord(), getValue(natures)); } - if (dictWord.getNature().contains(DictWordType.METRIC.getTypeWithSpilt()) || dictWord.getNature() - .contains(DictWordType.DIMENSION.getTypeWithSpilt())) { + if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature() + .contains(DictWordType.DIMENSION.getType())) { suffixTrie.remove(dictWord.getWord()); } } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/DimensionWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/DimensionWordBuilder.java index 3c0f65e9e..172d00461 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/DimensionWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/DimensionWordBuilder.java @@ -31,10 +31,10 @@ public class DimensionWordBuilder extends BaseWordWithAliasBuilder { dictWord.setWord(word); Long modelId = schemaElement.getModel(); String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.DIMENSION.getTypeWithSpilt(); + + DictWordType.DIMENSION.getType(); if (isSuffix) { nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.DIMENSION.getTypeWithSpilt(); + + DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType(); } dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); return dictWord; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/EntityWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/EntityWordBuilder.java index 53529eb6f..6fc57e440 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/EntityWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/EntityWordBuilder.java @@ -29,7 +29,7 @@ public class EntityWordBuilder extends BaseWordWithAliasBuilder { @Override public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) { String nature = DictWordType.NATURE_SPILT + schemaElement.getModel() - + DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getTypeWithSpilt(); + + DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType(); DictWord dictWord = new DictWord(); dictWord.setWord(word); dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature)); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/MetricWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/MetricWordBuilder.java index 2e54763c7..811c58524 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/MetricWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/MetricWordBuilder.java @@ -31,10 +31,10 @@ public class MetricWordBuilder extends BaseWordWithAliasBuilder { dictWord.setWord(word); Long modelId = schemaElement.getModel(); String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.METRIC.getTypeWithSpilt(); + + DictWordType.METRIC.getType(); if (isSuffix) { nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.METRIC.getTypeWithSpilt(); + + DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType(); } dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); return dictWord; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/TagWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/TagWordBuilder.java deleted file mode 100644 index 1ca53695b..000000000 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/TagWordBuilder.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.tencent.supersonic.headless.core.chat.knowledge.builder; - -import com.google.common.collect.Lists; -import com.tencent.supersonic.common.pojo.enums.DictWordType; -import com.tencent.supersonic.headless.api.pojo.SchemaElement; -import com.tencent.supersonic.headless.core.chat.knowledge.DictWord; - -import java.util.List; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; - -@Service -public class TagWordBuilder extends BaseWordWithAliasBuilder { - - @Override - public List doGet(String word, SchemaElement schemaElement) { - List result = Lists.newArrayList(); - result.add(getOneWordNature(word, schemaElement, false)); - result.addAll(getOneWordNatureAlias(schemaElement, false)); - String reverseWord = StringUtils.reverse(word); - if (!word.equalsIgnoreCase(reverseWord)) { - result.add(getOneWordNature(reverseWord, schemaElement, true)); - } - return result; - } - - public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) { - DictWord dictWord = new DictWord(); - dictWord.setWord(word); - Long modelId = schemaElement.getModel(); - String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.TAG.getTypeWithSpilt(); - if (isSuffix) { - nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.TAG.getTypeWithSpilt(); - } - dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); - return dictWord; - } - -} diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/WordBuilderFactory.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/WordBuilderFactory.java index bdcdfad87..4036ca576 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/WordBuilderFactory.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/builder/WordBuilderFactory.java @@ -18,7 +18,6 @@ public class WordBuilderFactory { wordNatures.put(DictWordType.DATASET, new ModelWordBuilder()); wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder()); wordNatures.put(DictWordType.VALUE, new ValueWordBuilder()); - wordNatures.put(DictWordType.TAG, new TagWordBuilder()); } public static BaseWordBuilder get(DictWordType strategyType) { diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java index c15282af1..4d11b47ea 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/knowledge/helper/NatureHelper.java @@ -46,12 +46,6 @@ public class NatureHelper { case VALUE: result = SchemaElementType.VALUE; break; - case TAG: - result = SchemaElementType.TAG; - break; - case TAG_VALUE: - result = SchemaElementType.TAG_VALUE; - break; default: break; } @@ -60,7 +54,7 @@ public class NatureHelper { private static boolean isDataSetOrEntity(S2Term term, Integer model) { return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString() - .endsWith(DictWordType.ENTITY.getTypeWithSpilt()); + .endsWith(DictWordType.ENTITY.getType()); } public static Integer getDataSetByNature(Nature nature) { @@ -134,8 +128,8 @@ public class NatureHelper { if (split.length <= 1) { return false; } - return !nature.endsWith(DictWordType.METRIC.getTypeWithSpilt()) && !nature.endsWith( - DictWordType.DIMENSION.getTypeWithSpilt()) + return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith( + DictWordType.DIMENSION.getType()) && StringUtils.isNumeric(split[1]); } @@ -158,12 +152,12 @@ public class NatureHelper { private static long getDimensionCount(List terms) { return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() - .endsWith(DictWordType.DIMENSION.getTypeWithSpilt())).count(); + .endsWith(DictWordType.DIMENSION.getType())).count(); } private static long getMetricCount(List terms) { return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() - .endsWith(DictWordType.METRIC.getTypeWithSpilt())).count(); + .endsWith(DictWordType.METRIC.getType())).count(); } /** diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/EntityMapper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/EntityMapper.java index b49f1f7fd..b54744349 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/EntityMapper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/EntityMapper.java @@ -33,10 +33,8 @@ public class EntityMapper extends BaseMapper { continue; } List valueSchemaElements = schemaElementMatchList.stream() - .filter(schemaElementMatch -> - SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()) - || SchemaElementType.TAG_VALUE.equals(schemaElementMatch.getElement().getType() - )) + .filter(schemaElementMatch -> SchemaElementType.VALUE.equals( + schemaElementMatch.getElement().getType())) .collect(Collectors.toList()); for (SchemaElementMatch schemaElementMatch : valueSchemaElements) { if (!entity.getId().equals(schemaElementMatch.getElement().getId())) { diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/KeywordMapper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/KeywordMapper.java index 7100fafe2..d703c5d73 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/KeywordMapper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/KeywordMapper.java @@ -71,8 +71,7 @@ public class KeywordMapper extends BaseMapper { if (element == null) { continue; } - if (element.getType().equals(SchemaElementType.VALUE) || element.getType() - .equals(SchemaElementType.TAG_VALUE)) { + if (element.getType().equals(SchemaElementType.VALUE)) { element.setName(hanlpMapResult.getName()); } Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/QueryFilterMapper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/QueryFilterMapper.java index a8fa0c21b..f048edd23 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/QueryFilterMapper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/QueryFilterMapper.java @@ -19,12 +19,12 @@ import java.util.Set; import java.util.stream.Collectors; @Slf4j -public class QueryFilterMapper implements SchemaMapper { +public class QueryFilterMapper extends BaseMapper { private double similarity = 1.0; @Override - public void map(QueryContext queryContext) { + public void doMap(QueryContext queryContext) { Set dataSetIds = queryContext.getDataSetIds(); if (CollectionUtils.isEmpty(dataSetIds)) { return; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/SearchMatchStrategy.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/SearchMatchStrategy.java index f9d144a20..f5d99e26e 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/SearchMatchStrategy.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/mapper/SearchMatchStrategy.java @@ -65,7 +65,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy { // remove entity name where search hanlpMapResults = hanlpMapResults.stream().filter(entry -> { List natures = entry.getNatures().stream() - .filter(nature -> !nature.endsWith(DictWordType.ENTITY.getTypeWithSpilt())) + .filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType())) .collect(Collectors.toList()); if (CollectionUtils.isEmpty(natures)) { return false; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/QueryTypeParser.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/QueryTypeParser.java index eda8610b6..5d0f793a9 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/QueryTypeParser.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/QueryTypeParser.java @@ -1,13 +1,25 @@ package com.tencent.supersonic.headless.core.chat.parser; import com.tencent.supersonic.auth.api.authentication.pojo.User; +import com.tencent.supersonic.common.pojo.enums.QueryType; +import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; +import com.tencent.supersonic.common.util.jsqlparser.SqlSelectHelper; +import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo; +import com.tencent.supersonic.headless.api.pojo.SemanticSchema; +import com.tencent.supersonic.headless.api.pojo.SqlInfo; import com.tencent.supersonic.headless.core.chat.query.SemanticQuery; +import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery; +import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery; import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.QueryContext; -import lombok.extern.slf4j.Slf4j; - import java.util.List; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.StringUtils; /** * QueryTypeParser resolves query type as either METRIC or TAG, or ID. @@ -25,9 +37,50 @@ public class QueryTypeParser implements SemanticParser { // 1.init S2SQL semanticQuery.initS2Sql(queryContext.getSemanticSchema(), user); // 2.set queryType - SemanticParseInfo parseInfo = semanticQuery.getParseInfo(); - parseInfo.setQueryType(queryContext.getQueryType(parseInfo.getDataSetId())); + QueryType queryType = getQueryType(queryContext, semanticQuery); + semanticQuery.getParseInfo().setQueryType(queryType); } } + private QueryType getQueryType(QueryContext queryContext, SemanticQuery semanticQuery) { + SemanticParseInfo parseInfo = semanticQuery.getParseInfo(); + SqlInfo sqlInfo = parseInfo.getSqlInfo(); + if (Objects.isNull(sqlInfo) || StringUtils.isBlank(sqlInfo.getS2SQL())) { + return QueryType.ID; + } + //1. entity queryType + Long dataSetId = parseInfo.getDataSetId(); + SemanticSchema semanticSchema = queryContext.getSemanticSchema(); + if (semanticQuery instanceof RuleSemanticQuery || semanticQuery instanceof LLMSqlQuery) { + //If all the fields in the SELECT statement are of tag type. + List whereFields = SqlSelectHelper.getWhereFields(sqlInfo.getS2SQL()) + .stream().filter(field -> !TimeDimensionEnum.containsTimeDimension(field)) + .collect(Collectors.toList()); + + if (CollectionUtils.isNotEmpty(whereFields)) { + Set ids = semanticSchema.getEntities(dataSetId).stream().map(SchemaElement::getName) + .collect(Collectors.toSet()); + if (CollectionUtils.isNotEmpty(ids) && ids.stream().anyMatch(whereFields::contains)) { + return QueryType.ID; + } + Set tags = semanticSchema.getTags(dataSetId).stream().map(SchemaElement::getName) + .collect(Collectors.toSet()); + if (CollectionUtils.isNotEmpty(tags) && tags.containsAll(whereFields)) { + return QueryType.TAG; + } + } + } + //2. metric queryType + List selectFields = SqlSelectHelper.getSelectFields(sqlInfo.getS2SQL()); + List metrics = semanticSchema.getMetrics(dataSetId); + if (CollectionUtils.isNotEmpty(metrics)) { + Set metricNameSet = metrics.stream().map(SchemaElement::getName).collect(Collectors.toSet()); + boolean containMetric = selectFields.stream().anyMatch(metricNameSet::contains); + if (containMetric) { + return QueryType.METRIC; + } + } + return QueryType.ID; + } + } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/LLMRequestService.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/LLMRequestService.java index 779102861..5410870c9 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/LLMRequestService.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/llm/LLMRequestService.java @@ -1,7 +1,6 @@ package com.tencent.supersonic.headless.core.chat.parser.llm; import com.tencent.supersonic.common.pojo.enums.DataFormatTypeEnum; -import com.tencent.supersonic.common.pojo.enums.QueryType; import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum; import com.tencent.supersonic.common.util.DateUtils; import com.tencent.supersonic.headless.api.pojo.SchemaElement; @@ -17,13 +16,6 @@ import com.tencent.supersonic.headless.core.config.OptimizationConfig; import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.utils.ComponentFactory; import com.tencent.supersonic.headless.core.utils.S2SqlDateHelper; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.tuple.Pair; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; -import org.springframework.util.CollectionUtils; - import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; @@ -32,10 +24,17 @@ import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.util.CollectionUtils; @Slf4j @Service public class LLMRequestService { + @Autowired private LLMParserConfig llmParserConfig; @Autowired @@ -62,7 +61,7 @@ public class LLMRequestService { } public LLMReq getLlmReq(QueryContext queryCtx, Long dataSetId, - SemanticSchema semanticSchema, List linkingValues) { + SemanticSchema semanticSchema, List linkingValues) { Map dataSetIdToName = semanticSchema.getDataSetIdToName(); String queryText = queryCtx.getQueryText(); @@ -154,8 +153,7 @@ public class LLMRequestService { .filter(elementMatch -> !elementMatch.isInherited()) .filter(schemaElementMatch -> { SchemaElementType type = schemaElementMatch.getElement().getType(); - return SchemaElementType.VALUE.equals(type) || SchemaElementType.TAG_VALUE.equals(type) - || SchemaElementType.ID.equals(type); + return SchemaElementType.VALUE.equals(type) || SchemaElementType.ID.equals(type); }) .map(elementMatch -> { ElementValue elementValue = new ElementValue(); @@ -169,9 +167,6 @@ public class LLMRequestService { protected Map getItemIdToName(QueryContext queryCtx, Long dataSetId) { SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); List elements = semanticSchema.getDimensions(dataSetId); - if (QueryType.TAG.equals(queryCtx.getQueryType(dataSetId))) { - elements = semanticSchema.getTags(dataSetId); - } return elements.stream() .collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2)); } @@ -179,27 +174,18 @@ public class LLMRequestService { private Set getTopNFieldNames(QueryContext queryCtx, Long dataSetId, LLMParserConfig llmParserConfig) { SemanticSchema semanticSchema = queryCtx.getSemanticSchema(); Set results = new HashSet<>(); - if (QueryType.TAG.equals(queryCtx.getQueryType(dataSetId))) { - Set tags = semanticSchema.getTags(dataSetId).stream() - .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) - .limit(llmParserConfig.getDimensionTopN()) - .map(entry -> entry.getName()) - .collect(Collectors.toSet()); - results.addAll(tags); - } else { - Set dimensions = semanticSchema.getDimensions(dataSetId).stream() - .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) - .limit(llmParserConfig.getDimensionTopN()) - .map(entry -> entry.getName()) - .collect(Collectors.toSet()); - results.addAll(dimensions); - Set metrics = semanticSchema.getMetrics(dataSetId).stream() - .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) - .limit(llmParserConfig.getMetricTopN()) - .map(entry -> entry.getName()) - .collect(Collectors.toSet()); - results.addAll(metrics); - } + Set dimensions = semanticSchema.getDimensions(dataSetId).stream() + .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) + .limit(llmParserConfig.getDimensionTopN()) + .map(entry -> entry.getName()) + .collect(Collectors.toSet()); + results.addAll(dimensions); + Set metrics = semanticSchema.getMetrics(dataSetId).stream() + .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) + .limit(llmParserConfig.getMetricTopN()) + .map(entry -> entry.getName()) + .collect(Collectors.toSet()); + results.addAll(metrics); return results; } @@ -214,15 +200,12 @@ public class LLMRequestService { SchemaElementType elementType = schemaElementMatch.getElement().getType(); return SchemaElementType.METRIC.equals(elementType) || SchemaElementType.DIMENSION.equals(elementType) - || SchemaElementType.VALUE.equals(elementType) - || SchemaElementType.TAG.equals(elementType) - || SchemaElementType.TAG_VALUE.equals(elementType); + || SchemaElementType.VALUE.equals(elementType); }) .map(schemaElementMatch -> { SchemaElement element = schemaElementMatch.getElement(); SchemaElementType elementType = element.getType(); - if (SchemaElementType.VALUE.equals(elementType) || SchemaElementType.TAG_VALUE.equals( - elementType)) { + if (SchemaElementType.VALUE.equals(elementType)) { return itemIdToName.get(element.getId()); } return schemaElementMatch.getWord(); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/ContextInheritParser.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/ContextInheritParser.java index 995209e97..65039f195 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/ContextInheritParser.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/ContextInheritParser.java @@ -38,7 +38,6 @@ public class ContextInheritParser implements SemanticParser { new AbstractMap.SimpleEntry<>( SchemaElementType.VALUE, Arrays.asList(SchemaElementType.VALUE, SchemaElementType.DIMENSION)), new AbstractMap.SimpleEntry<>(SchemaElementType.ENTITY, Arrays.asList(SchemaElementType.ENTITY)), - new AbstractMap.SimpleEntry<>(SchemaElementType.TAG, Arrays.asList(SchemaElementType.TAG)), new AbstractMap.SimpleEntry<>(SchemaElementType.DATASET, Arrays.asList(SchemaElementType.DATASET)), new AbstractMap.SimpleEntry<>(SchemaElementType.ID, Arrays.asList(SchemaElementType.ID)) ).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/RuleSqlParser.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/RuleSqlParser.java index f3fa53f26..92de7be6e 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/RuleSqlParser.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/parser/rule/RuleSqlParser.java @@ -1,11 +1,8 @@ package com.tencent.supersonic.headless.core.chat.parser.rule; -import com.tencent.supersonic.common.pojo.enums.QueryType; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo; -import com.tencent.supersonic.headless.api.pojo.SemanticParseInfo; import com.tencent.supersonic.headless.core.chat.parser.SemanticParser; -import com.tencent.supersonic.headless.core.chat.query.QueryManager; import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery; import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.QueryContext; @@ -35,24 +32,10 @@ public class RuleSqlParser implements SemanticParser { List queries = RuleSemanticQuery.resolve(dataSetId, elementMatches, queryContext); for (RuleSemanticQuery query : queries) { query.fillParseInfo(queryContext, chatContext); - SemanticParseInfo parseInfo = query.getParseInfo(); - QueryType queryType = queryContext.getQueryType(parseInfo.getDataSetId()); - if (isRightQuery(parseInfo, queryType)) { - queryContext.getCandidateQueries().add(query); - } + queryContext.getCandidateQueries().add(query); } } auxiliaryParsers.stream().forEach(p -> p.parse(queryContext, chatContext)); } - - private boolean isRightQuery(SemanticParseInfo parseInfo, QueryType queryType) { - if (QueryType.TAG.equals(queryType) && QueryManager.isTagQuery(parseInfo.getQueryMode())) { - return true; - } - if (QueryType.METRIC.equals(queryType) && QueryManager.isMetricQuery(parseInfo.getQueryMode())) { - return true; - } - return false; - } } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/RuleSemanticQuery.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/RuleSemanticQuery.java index cd841ea9a..cf0c516ff 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/RuleSemanticQuery.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/RuleSemanticQuery.java @@ -3,7 +3,6 @@ package com.tencent.supersonic.headless.core.chat.query.rule; import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum; -import com.tencent.supersonic.common.pojo.enums.QueryType; import com.tencent.supersonic.headless.api.pojo.SchemaElement; import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch; import com.tencent.supersonic.headless.api.pojo.SchemaElementType; @@ -18,10 +17,6 @@ import com.tencent.supersonic.headless.core.chat.query.QueryManager; import com.tencent.supersonic.headless.core.pojo.ChatContext; import com.tencent.supersonic.headless.core.pojo.QueryContext; import com.tencent.supersonic.headless.core.utils.QueryReqBuilder; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; - import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -30,6 +25,9 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.stream.Collectors; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; @Slf4j @ToString @@ -42,7 +40,7 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery { } public List match(List candidateElementMatches, - QueryContext queryCtx) { + QueryContext queryCtx) { return queryMatcher.match(candidateElementMatches); } @@ -101,22 +99,31 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery { parseInfo.setDataSet(semanticSchema.getDataSet(dataSetId)); Map> dim2Values = new HashMap<>(); Map> id2Values = new HashMap<>(); - Map> tag2Values = new HashMap<>(); for (SchemaElementMatch schemaMatch : parseInfo.getElementMatches()) { SchemaElement element = schemaMatch.getElement(); element.setOrder(1 - schemaMatch.getSimilarity()); switch (element.getType()) { case ID: - addToValues(semanticSchema, SchemaElementType.ENTITY, id2Values, schemaMatch); - break; - case TAG_VALUE: - addToValues(semanticSchema, SchemaElementType.TAG, tag2Values, schemaMatch); + SchemaElement entityElement = semanticSchema.getElement(SchemaElementType.ENTITY, element.getId()); + if (entityElement != null) { + if (id2Values.containsKey(element.getId())) { + id2Values.get(element.getId()).add(schemaMatch); + } else { + id2Values.put(element.getId(), new ArrayList<>(Arrays.asList(schemaMatch))); + } + } break; case VALUE: - addToValues(semanticSchema, SchemaElementType.DIMENSION, dim2Values, schemaMatch); + SchemaElement dimElement = semanticSchema.getElement(SchemaElementType.DIMENSION, element.getId()); + if (dimElement != null) { + if (dim2Values.containsKey(element.getId())) { + dim2Values.get(element.getId()).add(schemaMatch); + } else { + dim2Values.put(element.getId(), new ArrayList<>(Arrays.asList(schemaMatch))); + } + } break; - case TAG: case DIMENSION: parseInfo.getDimensions().add(element); break; @@ -129,10 +136,8 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery { default: } } - addToFilters(id2Values, parseInfo, semanticSchema, SchemaElementType.ENTITY); addToFilters(dim2Values, parseInfo, semanticSchema, SchemaElementType.DIMENSION); - addToFilters(tag2Values, parseInfo, semanticSchema, SchemaElementType.TAG); } private void addToFilters(Map> id2Values, SemanticParseInfo parseInfo, @@ -220,8 +225,6 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery { public static List resolve(Long dataSetId, List candidateElementMatches, QueryContext queryContext) { List matchedQueries = new ArrayList<>(); - candidateElementMatches = filterByQueryType(dataSetId, candidateElementMatches, queryContext); - for (RuleSemanticQuery semanticQuery : QueryManager.getRuleQueries()) { List matches = semanticQuery.match(candidateElementMatches, queryContext); @@ -231,30 +234,9 @@ public abstract class RuleSemanticQuery extends BaseSemanticQuery { matchedQueries.add(query); } } - return matchedQueries; } - private static List filterByQueryType(Long dataSetId, - List candidateElementMatches, QueryContext queryContext) { - QueryType queryType = queryContext.getQueryType(dataSetId); - if (QueryType.TAG.equals(queryType)) { - candidateElementMatches = candidateElementMatches.stream() - .filter(elementMatch -> !(SchemaElementType.METRIC.equals(elementMatch.getElement().getType()) - || SchemaElementType.DIMENSION.equals(elementMatch.getElement().getType()) - || SchemaElementType.VALUE.equals(elementMatch.getElement().getType())) - ) - .collect(Collectors.toList()); - } - if (QueryType.METRIC.equals(queryType)) { - candidateElementMatches = candidateElementMatches.stream() - .filter(elementMatch -> !(SchemaElementType.TAG.equals(elementMatch.getElement().getType()) - || SchemaElementType.TAG_VALUE.equals(elementMatch.getElement().getType()))) - .collect(Collectors.toList()); - } - return candidateElementMatches; - } - protected QueryStructReq convertQueryStruct() { return QueryReqBuilder.buildStructReq(parseInfo); } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagDetailQuery.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagDetailQuery.java index b18fb9e92..fccd89680 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagDetailQuery.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagDetailQuery.java @@ -1,11 +1,10 @@ package com.tencent.supersonic.headless.core.chat.query.rule.tag; -import org.springframework.stereotype.Component; - -import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG; +import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.ID; import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED; import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST; -import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.ID; + +import org.springframework.stereotype.Component; @Component public class TagDetailQuery extends TagSemanticQuery { @@ -14,8 +13,7 @@ public class TagDetailQuery extends TagSemanticQuery { public TagDetailQuery() { super(); - queryMatcher.addOption(TAG, REQUIRED, AT_LEAST, 1) - .addOption(ID, REQUIRED, AT_LEAST, 1); + queryMatcher.addOption(ID, REQUIRED, AT_LEAST, 1); } @Override diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagFilterQuery.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagFilterQuery.java index f2c3344e9..f6b746056 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagFilterQuery.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/chat/query/rule/tag/TagFilterQuery.java @@ -1,14 +1,12 @@ package com.tencent.supersonic.headless.core.chat.query.rule.tag; +import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.VALUE; +import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED; +import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST; + import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; -import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG; -import static com.tencent.supersonic.headless.api.pojo.SchemaElementType.TAG_VALUE; -import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.OPTIONAL; -import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.OptionType.REQUIRED; -import static com.tencent.supersonic.headless.core.chat.query.rule.QueryMatchOption.RequireNumberType.AT_LEAST; - @Slf4j @Component public class TagFilterQuery extends TagListQuery { @@ -17,8 +15,7 @@ public class TagFilterQuery extends TagListQuery { public TagFilterQuery() { super(); - queryMatcher.addOption(TAG, OPTIONAL, AT_LEAST, 0); - queryMatcher.addOption(TAG_VALUE, REQUIRED, AT_LEAST, 1); + queryMatcher.addOption(VALUE, REQUIRED, AT_LEAST, 1); } @Override diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/QueryServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/QueryServiceImpl.java index 9c260cd2a..3502f6934 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/QueryServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/QueryServiceImpl.java @@ -146,11 +146,7 @@ public class QueryServiceImpl implements QueryService { queryStatement.setModelIds(querySqlReq.getModelIds()); queryStatement.setEnableOptimize(queryUtils.enableOptimize()); queryStatement.setSemanticSchemaResp(semanticSchemaResp); - if (QueryType.TAG.equals(semanticSchemaResp.getQueryType())) { - queryStatement.setSemanticModel(semanticSchemaManager.getTagSemanticModel(semanticSchemaResp)); - } else { - queryStatement.setSemanticModel(semanticSchemaManager.getSemanticModel(semanticSchemaResp)); - } + queryStatement.setSemanticModel(semanticSchemaManager.getSemanticModel(semanticSchemaResp)); return queryStatement; } diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagMetaServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagMetaServiceImpl.java index 753443f74..f6870b51a 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagMetaServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagMetaServiceImpl.java @@ -148,7 +148,6 @@ public class TagMetaServiceImpl implements TagMetaService { @Override public TagResp getTag(Long id, User user) { - // return convert(tagRepository.getTagById(id)); TagDO tagDO = tagRepository.getTagById(id); TagResp tagResp = fillCollectAndAdminInfo(tagDO, user); tagResp = fillModelInfo(tagResp); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagQueryServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagQueryServiceImpl.java index 8e368bae1..ccabcb2ac 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagQueryServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/TagQueryServiceImpl.java @@ -60,7 +60,7 @@ public class TagQueryServiceImpl implements TagQueryService { public ItemValueResp queryTagValue(ItemValueReq itemValueReq, User user) throws Exception { ItemValueResp itemValueResp = new ItemValueResp(); itemValueResp.setItemId(itemValueReq.getItemId()); - itemValueResp.setType(SchemaElementType.TAG); + itemValueResp.setType(SchemaElementType.DIMENSION); TagResp tag = tagMetaService.getTag(itemValueReq.getItemId(), user); checkTag(tag); itemValueResp.setName(tag.getName()); diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/WordService.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/WordService.java index 19942dc98..e21d2c16c 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/WordService.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/WordService.java @@ -34,7 +34,6 @@ public class WordService { addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words); addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words); addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words); - addWordsByType(DictWordType.TAG, semanticSchema.getTags(), words); return words; } diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DataSetSchemaBuilder.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DataSetSchemaBuilder.java index 30506c936..f06454f91 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DataSetSchemaBuilder.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DataSetSchemaBuilder.java @@ -12,11 +12,6 @@ import com.tencent.supersonic.headless.api.pojo.SchemaValueMap; import com.tencent.supersonic.headless.api.pojo.response.DataSetSchemaResp; import com.tencent.supersonic.headless.api.pojo.response.DimSchemaResp; import com.tencent.supersonic.headless.api.pojo.response.MetricSchemaResp; -import com.tencent.supersonic.headless.api.pojo.response.TagResp; -import org.apache.logging.log4j.util.Strings; -import org.springframework.beans.BeanUtils; -import org.springframework.util.CollectionUtils; - import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -24,6 +19,9 @@ import java.util.List; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import org.apache.logging.log4j.util.Strings; +import org.springframework.beans.BeanUtils; +import org.springframework.util.CollectionUtils; public class DataSetSchemaBuilder { @@ -43,18 +41,18 @@ public class DataSetSchemaBuilder { Set metrics = getMetrics(resp); dataSetSchema.getMetrics().addAll(metrics); + Set metricTags = getMetricTags(resp); + dataSetSchema.getTags().addAll(metricTags); + Set dimensions = getDimensions(resp); dataSetSchema.getDimensions().addAll(dimensions); + Set dimensionTags = getDimensionTags(resp); + dataSetSchema.getTags().addAll(dimensionTags); + Set dimensionValues = getDimensionValues(resp); dataSetSchema.getDimensionValues().addAll(dimensionValues); - Set tags = getTags(resp); - dataSetSchema.getTags().addAll(tags); - - Set tagValues = getTagValues(resp); - dataSetSchema.getTagValues().addAll(tagValues); - SchemaElement entity = getEntity(resp); if (Objects.nonNull(entity)) { dataSetSchema.setEntity(entity); @@ -62,6 +60,58 @@ public class DataSetSchemaBuilder { return dataSetSchema; } + private static Set getMetricTags(DataSetSchemaResp resp) { + Set tags = new HashSet<>(); + for (MetricSchemaResp metric : resp.getMetrics()) { + List alias = SchemaItem.getAliasList(metric.getAlias()); + if (metric.getIsTag() == 1) { + SchemaElement tagToAdd = SchemaElement.builder() + .dataSet(resp.getId()) + .model(metric.getModelId()) + .id(metric.getId()) + .name(metric.getName()) + .bizName(metric.getBizName()) + .type(SchemaElementType.TAG) + .useCnt(metric.getUseCnt()) + .alias(alias) + .build(); + tags.add(tagToAdd); + } + } + return tags; + } + + private static Set getDimensionTags(DataSetSchemaResp resp) { + Set tags = new HashSet<>(); + for (DimSchemaResp dim : resp.getDimensions()) { + List alias = SchemaItem.getAliasList(dim.getAlias()); + List dimValueMaps = dim.getDimValueMaps(); + List schemaValueMaps = new ArrayList<>(); + if (!CollectionUtils.isEmpty(dimValueMaps)) { + for (DimValueMap dimValueMap : dimValueMaps) { + SchemaValueMap schemaValueMap = new SchemaValueMap(); + BeanUtils.copyProperties(dimValueMap, schemaValueMap); + schemaValueMaps.add(schemaValueMap); + } + } + if (dim.getIsTag() == 1) { + SchemaElement tagToAdd = SchemaElement.builder() + .dataSet(resp.getId()) + .model(dim.getModelId()) + .id(dim.getId()) + .name(dim.getName()) + .bizName(dim.getBizName()) + .type(SchemaElementType.TAG) + .useCnt(dim.getUseCnt()) + .alias(alias) + .schemaValueMaps(schemaValueMaps) + .build(); + tags.add(tagToAdd); + } + } + return tags; + } + private static SchemaElement getEntity(DataSetSchemaResp resp) { DimSchemaResp dim = resp.getPrimaryKey(); if (Objects.isNull(dim)) { @@ -79,38 +129,6 @@ public class DataSetSchemaBuilder { .build(); } - private static Set getTags(DataSetSchemaResp resp) { - Set tags = new HashSet<>(); - for (TagResp tagResp : resp.getTags()) { - SchemaElement element = SchemaElement.builder() - .dataSet(resp.getId()) - .model(tagResp.getModelId()) - .id(tagResp.getId()) - .name(tagResp.getName()) - .bizName(tagResp.getBizName()) - .type(SchemaElementType.TAG) - .build(); - tags.add(element); - } - return tags; - } - - private static Set getTagValues(DataSetSchemaResp resp) { - Set dimensionValues = new HashSet<>(); - for (TagResp tagResp : resp.getTags()) { - SchemaElement element = SchemaElement.builder() - .dataSet(resp.getId()) - .model(tagResp.getModelId()) - .id(tagResp.getId()) - .name(tagResp.getName()) - .bizName(tagResp.getBizName()) - .type(SchemaElementType.TAG_VALUE) - .build(); - dimensionValues.add(element); - } - return dimensionValues; - } - private static Set getDimensions(DataSetSchemaResp resp) { Set dimensions = new HashSet<>(); for (DimSchemaResp dim : resp.getDimensions()) { diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DictUtils.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DictUtils.java index 2529a9d09..33c89498e 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DictUtils.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/utils/DictUtils.java @@ -1,5 +1,11 @@ package com.tencent.supersonic.headless.server.utils; +import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER; +import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE; +import static com.tencent.supersonic.common.pojo.Constants.COMMA; +import static com.tencent.supersonic.common.pojo.Constants.POUND; +import static com.tencent.supersonic.common.pojo.Constants.SPACE; + import com.google.common.base.Strings; import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.common.pojo.Aggregator; @@ -35,12 +41,6 @@ import com.tencent.supersonic.headless.server.service.MetricService; import com.tencent.supersonic.headless.server.service.ModelService; import com.tencent.supersonic.headless.server.service.QueryService; import com.tencent.supersonic.headless.server.service.TagMetaService; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.BeanUtils; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Lazy; -import org.springframework.stereotype.Component; -import org.springframework.util.CollectionUtils; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -53,11 +53,12 @@ import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.StringJoiner; -import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER; -import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE; -import static com.tencent.supersonic.common.pojo.Constants.COMMA; -import static com.tencent.supersonic.common.pojo.Constants.POUND; -import static com.tencent.supersonic.common.pojo.Constants.SPACE; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.BeanUtils; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Lazy; +import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; @Slf4j @Component @@ -185,7 +186,7 @@ public class DictUtils { mergeMultivaluedValue(valueAndFrequencyPair, dimValue, metric); } } - String nature = dictItemResp.generateNature(); + String nature = dictItemResp.getNature(); constructDictLines(valueAndFrequencyPair, lines, nature); addWhiteValueLines(dictItemResp, lines, nature); } catch (Exception e) { diff --git a/launchers/standalone/src/main/java/com/tencent/supersonic/ModelDemoDataLoader.java b/launchers/standalone/src/main/java/com/tencent/supersonic/ModelDemoDataLoader.java index 1475f5fc3..b13986380 100644 --- a/launchers/standalone/src/main/java/com/tencent/supersonic/ModelDemoDataLoader.java +++ b/launchers/standalone/src/main/java/com/tencent/supersonic/ModelDemoDataLoader.java @@ -110,7 +110,6 @@ public class ModelDemoDataLoader { updateDimension(); updateMetric(); updateMetric_pv(); - addTags(); addDataSet_1(); addDataSet_2(); addAuthGroup_1(); diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_1.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_1.txt deleted file mode 100644 index ef451bad0..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_1.txt +++ /dev/null @@ -1,3 +0,0 @@ -内地 _4_1_tv 100 -欧美 _4_1_tv 100 -港台 _4_1_tv 100 \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_2.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_2.txt deleted file mode 100644 index 4d9b8f8d0..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_2.txt +++ /dev/null @@ -1,2 +0,0 @@ -流行 _4_2_tv 100 -国风 _4_2_tv 100 \ No newline at end of file diff --git a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_4.txt b/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_4.txt deleted file mode 100644 index 245ec3a95..000000000 --- a/launchers/standalone/src/main/resources/data/dictionary/custom/TagValue_4_4.txt +++ /dev/null @@ -1,6 +0,0 @@ -周杰伦 _4_4_tv 100 -陈奕迅 _4_4_tv 100 -林俊杰 _4_4_tv 100 -张碧晨 _4_4_tv 100 -程响 _4_4_tv 100 -Taylor#Swift _4_4_tv 100 \ No newline at end of file diff --git a/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java b/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java index 6210ea0b1..6ee8e495e 100644 --- a/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java +++ b/launchers/standalone/src/test/java/com/tencent/supersonic/chat/TagTest.java @@ -27,7 +27,7 @@ public class TagTest extends BaseTest { expectedParseInfo.setAggType(AggregateTypeEnum.NONE); QueryFilter dimensionFilter = DataUtils.getFilter("genre", FilterOperatorEnum.EQUALS, - "流行", "风格", 2L); + "流行", "风格", 6L); expectedParseInfo.getDimensionFilters().add(dimensionFilter); SchemaElement metric = SchemaElement.builder().name("播放量").build(); diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_1.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_1.txt deleted file mode 100644 index ef451bad0..000000000 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_1.txt +++ /dev/null @@ -1,3 +0,0 @@ -内地 _4_1_tv 100 -欧美 _4_1_tv 100 -港台 _4_1_tv 100 \ No newline at end of file diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_2.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_2.txt deleted file mode 100644 index 4d9b8f8d0..000000000 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_2.txt +++ /dev/null @@ -1,2 +0,0 @@ -流行 _4_2_tv 100 -国风 _4_2_tv 100 \ No newline at end of file diff --git a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_4.txt b/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_4.txt deleted file mode 100644 index 245ec3a95..000000000 --- a/launchers/standalone/src/test/resources/data/dictionary/custom/TagValue_4_4.txt +++ /dev/null @@ -1,6 +0,0 @@ -周杰伦 _4_4_tv 100 -陈奕迅 _4_4_tv 100 -林俊杰 _4_4_tv 100 -张碧晨 _4_4_tv 100 -程响 _4_4_tv 100 -Taylor#Swift _4_4_tv 100 \ No newline at end of file