From 3ad18b0ee0b32b508e1a275540a1604ee0b9b84d Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Fri, 8 Mar 2024 16:59:51 +0800 Subject: [PATCH] (improvement)(Chat)Integration of tags into chat. (#798) --- .../chat/api/pojo/DataSetSchema.java | 31 ++-------------- .../chat/api/pojo/SemanticSchema.java | 17 +++++++-- .../core/mapper/EmbeddingMatchStrategy.java | 7 +++- .../chat/core/mapper/KeywordMapper.java | 3 +- .../chat/core/mapper/SearchMatchStrategy.java | 2 +- .../query/semantic/DataSetSchemaBuilder.java | 19 ++++++++++ .../listener/SchemaDictUpdateListener.java | 3 +- .../supersonic/common/pojo/Constants.java | 1 + .../common/pojo/enums/DictWordType.java | 36 ++++++++++++++----- .../supersonic/common/util/JsonUtil.java | 9 ++--- .../headless/api/pojo/SchemaElementType.java | 1 + .../api/pojo/response/DictItemResp.java | 6 +++- .../core/knowledge/SearchService.java | 6 ++-- .../builder/DimensionWordBuilder.java | 4 +-- .../knowledge/builder/EntityWordBuilder.java | 2 +- .../knowledge/builder/MetricWordBuilder.java | 4 +-- .../knowledge/builder/TagWordBuilder.java | 4 +-- .../knowledge/builder/WordBuilderFactory.java | 2 +- .../core/knowledge/helper/NatureHelper.java | 14 +++++--- .../service/impl/KnowledgeServiceImpl.java | 6 ++-- 20 files changed, 108 insertions(+), 69 deletions(-) diff --git a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/DataSetSchema.java b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/DataSetSchema.java index bf127623d..3d7939db6 100644 --- a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/DataSetSchema.java +++ b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/DataSetSchema.java @@ -19,6 +19,7 @@ public class DataSetSchema { private Set dimensions = new HashSet<>(); private Set dimensionValues = new HashSet<>(); private Set tags = new HashSet<>(); + private Set tagValues = new HashSet<>(); private SchemaElement entity = new SchemaElement(); private QueryConfig queryConfig; @@ -44,34 +45,8 @@ public class DataSetSchema { case TAG: element = tags.stream().filter(e -> e.getId() == elementID).findFirst(); break; - default: - } - - if (element.isPresent()) { - return element.get(); - } else { - return null; - } - } - - public SchemaElement getElement(SchemaElementType elementType, String name) { - Optional element = Optional.empty(); - - switch (elementType) { - case ENTITY: - element = Optional.ofNullable(entity); - break; - case DATASET: - element = Optional.of(dataSet); - break; - case METRIC: - element = metrics.stream().filter(e -> name.equals(e.getName())).findFirst(); - break; - case DIMENSION: - element = dimensions.stream().filter(e -> name.equals(e.getName())).findFirst(); - break; - case VALUE: - element = dimensionValues.stream().filter(e -> name.equals(e.getName())).findFirst(); + case TAG_VALUE: + element = tagValues.stream().filter(e -> e.getId() == elementID).findFirst(); break; default: } diff --git a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/SemanticSchema.java b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/SemanticSchema.java index cc5adfdd0..d9ee7949f 100644 --- a/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/SemanticSchema.java +++ b/chat/api/src/main/java/com/tencent/supersonic/chat/api/pojo/SemanticSchema.java @@ -46,6 +46,9 @@ public class SemanticSchema implements Serializable { case TAG: element = getElementsById(elementID, getTags()); break; + case TAG_VALUE: + element = getElementsById(elementID, getTagValues()); + break; default: } @@ -91,13 +94,21 @@ public class SemanticSchema implements Serializable { } public List getTags(Long dataSetId) { + List tags = getTags(); + return getElementsByDataSetId(dataSetId, tags); + } + + public List getTagValues() { List tags = new ArrayList<>(); - dataSetSchemaList.stream().filter(schemaElement -> - dataSetId.equals(schemaElement.getDataSet().getDataSet())) - .forEach(d -> tags.addAll(d.getTags())); + dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTagValues())); return tags; } + public List getTagValues(Long dataSetId) { + List tags = getTagValues(); + return getElementsByDataSetId(dataSetId, tags); + } + public List getMetrics() { List metrics = new ArrayList<>(); dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics())); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/EmbeddingMatchStrategy.java b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/EmbeddingMatchStrategy.java index 556802287..b359fecc9 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/EmbeddingMatchStrategy.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/EmbeddingMatchStrategy.java @@ -90,7 +90,12 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy { .map(retrieveQueryResult -> { List retrievals = retrieveQueryResult.getRetrieval(); if (CollectionUtils.isNotEmpty(retrievals)) { - retrievals.removeIf(retrieval -> retrieval.getDistance() > distance.doubleValue()); + retrievals.removeIf(retrieval -> { + if (!retrieveQueryResult.getQuery().contains(retrieval.getQuery())) { + return retrieval.getDistance() > distance.doubleValue(); + } + return false; + }); } return retrieveQueryResult; }) diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/KeywordMapper.java b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/KeywordMapper.java index a286d4c17..b182395ec 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/KeywordMapper.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/KeywordMapper.java @@ -73,7 +73,8 @@ public class KeywordMapper extends BaseMapper { if (element == null) { continue; } - if (element.getType().equals(SchemaElementType.VALUE)) { + if (element.getType().equals(SchemaElementType.VALUE) || element.getType() + .equals(SchemaElementType.TAG_VALUE)) { element.setName(hanlpMapResult.getName()); } Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature); diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/SearchMatchStrategy.java b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/SearchMatchStrategy.java index 7af47379f..0535f2bea 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/SearchMatchStrategy.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/core/mapper/SearchMatchStrategy.java @@ -64,7 +64,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy { // remove entity name where search hanlpMapResults = hanlpMapResults.stream().filter(entry -> { List natures = entry.getNatures().stream() - .filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType())) + .filter(nature -> !nature.endsWith(DictWordType.ENTITY.getTypeWithSpilt())) .collect(Collectors.toList()); if (CollectionUtils.isEmpty(natures)) { return false; diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/core/query/semantic/DataSetSchemaBuilder.java b/chat/core/src/main/java/com/tencent/supersonic/chat/core/query/semantic/DataSetSchemaBuilder.java index 0a0585beb..23357dd8e 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/core/query/semantic/DataSetSchemaBuilder.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/core/query/semantic/DataSetSchemaBuilder.java @@ -51,6 +51,9 @@ public class DataSetSchemaBuilder { Set tags = getTags(resp); dataSetSchema.getTags().addAll(tags); + Set tagValues = getTagValues(resp); + dataSetSchema.getTagValues().addAll(tagValues); + SchemaElement entity = getEntity(resp); if (Objects.nonNull(entity)) { dataSetSchema.setEntity(entity); @@ -91,6 +94,22 @@ public class DataSetSchemaBuilder { return tags; } + private static Set getTagValues(DataSetSchemaResp resp) { + Set dimensionValues = new HashSet<>(); + for (TagResp tagResp : resp.getTags()) { + SchemaElement element = SchemaElement.builder() + .dataSet(resp.getId()) + .model(tagResp.getModelId()) + .id(tagResp.getId()) + .name(tagResp.getName()) + .bizName(tagResp.getBizName()) + .type(SchemaElementType.TAG_VALUE) + .build(); + dimensionValues.add(element); + } + return dimensionValues; + } + private static Set getDimensions(DataSetSchemaResp resp) { Set dimensions = new HashSet<>(); for (DimSchemaResp dim : resp.getDimensions()) { diff --git a/chat/server/src/main/java/com/tencent/supersonic/chat/server/listener/SchemaDictUpdateListener.java b/chat/server/src/main/java/com/tencent/supersonic/chat/server/listener/SchemaDictUpdateListener.java index 21ebfe4d7..97cea8c19 100644 --- a/chat/server/src/main/java/com/tencent/supersonic/chat/server/listener/SchemaDictUpdateListener.java +++ b/chat/server/src/main/java/com/tencent/supersonic/chat/server/listener/SchemaDictUpdateListener.java @@ -32,7 +32,8 @@ public class SchemaDictUpdateListener implements ApplicationListener DictWord dictWord = new DictWord(); dictWord.setWord(dataItem.getName()); String sign = DictWordType.NATURE_SPILT; - String nature = sign + 1 + sign + dataItem.getId() + dataItem.getType().name().toLowerCase(); + String suffixNature = DictWordType.getSuffixNature(dataItem.getType()); + String nature = sign + dataItem.getModelId() + dataItem.getId() + suffixNature; String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY; dictWord.setNature(nature); dictWord.setNatureWithFrequency(natureWithFrequency); diff --git a/common/src/main/java/com/tencent/supersonic/common/pojo/Constants.java b/common/src/main/java/com/tencent/supersonic/common/pojo/Constants.java index ca53f33f0..7e34bc6e8 100644 --- a/common/src/main/java/com/tencent/supersonic/common/pojo/Constants.java +++ b/common/src/main/java/com/tencent/supersonic/common/pojo/Constants.java @@ -15,6 +15,7 @@ public class Constants { public static final String COLON = ":"; public static final String MINUS = "-"; public static final String UNDERLINE = "_"; + public static final String DICT_VALUE = "v"; public static final String UNDERLINE_DOUBLE = "__"; public static final String PARENTHESES_START = "("; public static final String PARENTHESES_END = ")"; diff --git a/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java b/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java index 03670f8cc..33f2f1781 100644 --- a/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java +++ b/common/src/main/java/com/tencent/supersonic/common/pojo/enums/DictWordType.java @@ -1,5 +1,6 @@ package com.tencent.supersonic.common.pojo.enums; +import java.util.Objects; import org.apache.commons.lang3.StringUtils; /*** @@ -12,15 +13,17 @@ public enum DictWordType { DIMENSION("dimension"), - VALUE("v"), + VALUE("dv"), - VIEW("view"), + DATASET("dataset"), ENTITY("entity"), NUMBER("m"), - TAG("t"), + TAG("tag"), + + TAG_VALUE("tv"), SUFFIX("suffix"); @@ -32,7 +35,7 @@ public enum DictWordType { this.type = type; } - public String getType() { + public String getTypeWithSpilt() { return NATURE_SPILT + type; } @@ -41,19 +44,36 @@ public enum DictWordType { return null; } for (DictWordType dictWordType : values()) { - if (nature.endsWith(dictWordType.getType())) { + if (nature.endsWith(dictWordType.getTypeWithSpilt())) { return dictWordType; } } - //view + //dataSet String[] natures = nature.split(DictWordType.NATURE_SPILT); if (natures.length == 2 && StringUtils.isNumeric(natures[1])) { - return VIEW; + return DATASET; } //dimension value - if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) { + if (natures.length >= 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) { return VALUE; } return null; } + + public static DictWordType of(TypeEnums type) { + for (DictWordType wordType : DictWordType.values()) { + if (wordType.name().equalsIgnoreCase(type.name())) { + return wordType; + } + } + return null; + } + + public static String getSuffixNature(TypeEnums type) { + DictWordType wordType = of(type); + if (Objects.nonNull(wordType)) { + return wordType.type; + } + return ""; + } } diff --git a/common/src/main/java/com/tencent/supersonic/common/util/JsonUtil.java b/common/src/main/java/com/tencent/supersonic/common/util/JsonUtil.java index 7b500c1ee..9090f00b6 100644 --- a/common/src/main/java/com/tencent/supersonic/common/util/JsonUtil.java +++ b/common/src/main/java/com/tencent/supersonic/common/util/JsonUtil.java @@ -13,11 +13,6 @@ import com.fasterxml.jackson.databind.type.CollectionType; import com.fasterxml.jackson.databind.type.MapType; import com.fasterxml.jackson.databind.type.TypeFactory; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; -import lombok.Getter; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; - import java.io.IOException; import java.nio.charset.Charset; import java.security.InvalidParameterException; @@ -25,6 +20,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Set; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; @Slf4j @@ -32,7 +30,6 @@ public class JsonUtil { public static final JsonUtil INSTANCE = new JsonUtil(); - @Setter @Getter private final ObjectMapper objectMapper = new ObjectMapper(); diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java index 0f410f5cf..5c4b90dd5 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/SchemaElementType.java @@ -7,6 +7,7 @@ public enum SchemaElementType { VALUE, ENTITY, TAG, + TAG_VALUE, ID, DATE } diff --git a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java index f42a276fa..fe1cc7bde 100644 --- a/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java +++ b/headless/api/src/main/java/com/tencent/supersonic/headless/api/pojo/response/DictItemResp.java @@ -1,5 +1,6 @@ package com.tencent.supersonic.headless.api.pojo.response; +import static com.tencent.supersonic.common.pojo.Constants.DICT_VALUE; import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE; import com.tencent.supersonic.common.pojo.enums.StatusEnum; @@ -10,6 +11,7 @@ import lombok.Data; @Data public class DictItemResp { + private Long id; private Long modelId; @@ -31,7 +33,9 @@ public class DictItemResp { private StatusEnum status; public String getNature() { - return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1); + return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1) + + DICT_VALUE; + } public String fetchDictFileName() { diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/SearchService.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/SearchService.java index eb1adef7c..4c6c4d8d5 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/SearchService.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/SearchService.java @@ -82,7 +82,7 @@ public class SearchService { entry -> { String name = entry.getKey().replace("#", " "); List natures = entry.getValue().stream() - .map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), "")) + .map(nature -> nature.replaceAll(DictWordType.SUFFIX.getTypeWithSpilt(), "")) .collect(Collectors.toList()); name = StringUtils.reverse(name); return new HanlpMapResult(name, natures, key); @@ -160,8 +160,8 @@ public class SearchService { if (Objects.nonNull(natures) && natures.length > 0) { trie.put(dictWord.getWord(), getValue(natures)); } - if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature() - .contains(DictWordType.DIMENSION.getType())) { + if (dictWord.getNature().contains(DictWordType.METRIC.getTypeWithSpilt()) || dictWord.getNature() + .contains(DictWordType.DIMENSION.getTypeWithSpilt())) { suffixTrie.remove(dictWord.getWord()); } } diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/DimensionWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/DimensionWordBuilder.java index 50c22cc59..41dec4857 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/DimensionWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/DimensionWordBuilder.java @@ -31,10 +31,10 @@ public class DimensionWordBuilder extends BaseWordWithAliasBuilder { dictWord.setWord(word); Long modelId = schemaElement.getModel(); String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.DIMENSION.getType(); + + DictWordType.DIMENSION.getTypeWithSpilt(); if (isSuffix) { nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType(); + + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.DIMENSION.getTypeWithSpilt(); } dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); return dictWord; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/EntityWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/EntityWordBuilder.java index b3f07a505..36756c7f9 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/EntityWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/EntityWordBuilder.java @@ -29,7 +29,7 @@ public class EntityWordBuilder extends BaseWordWithAliasBuilder { @Override public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) { String nature = DictWordType.NATURE_SPILT + schemaElement.getModel() - + DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType(); + + DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getTypeWithSpilt(); DictWord dictWord = new DictWord(); dictWord.setWord(word); dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature)); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/MetricWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/MetricWordBuilder.java index 9f86fc905..b30486635 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/MetricWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/MetricWordBuilder.java @@ -31,10 +31,10 @@ public class MetricWordBuilder extends BaseWordWithAliasBuilder { dictWord.setWord(word); Long modelId = schemaElement.getModel(); String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.METRIC.getType(); + + DictWordType.METRIC.getTypeWithSpilt(); if (isSuffix) { nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType(); + + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.METRIC.getTypeWithSpilt(); } dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); return dictWord; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/TagWordBuilder.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/TagWordBuilder.java index c8b79c707..5e7366ab7 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/TagWordBuilder.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/TagWordBuilder.java @@ -28,10 +28,10 @@ public class TagWordBuilder extends BaseWordWithAliasBuilder { dictWord.setWord(word); Long modelId = schemaElement.getModel(); String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.TAG.getType(); + + DictWordType.TAG.getTypeWithSpilt(); if (isSuffix) { nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId() - + DictWordType.SUFFIX.getType() + DictWordType.TAG.getType(); + + DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.TAG.getTypeWithSpilt(); } dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature)); return dictWord; diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/WordBuilderFactory.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/WordBuilderFactory.java index bf650947c..21ef104ba 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/WordBuilderFactory.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/builder/WordBuilderFactory.java @@ -15,7 +15,7 @@ public class WordBuilderFactory { static { wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder()); wordNatures.put(DictWordType.METRIC, new MetricWordBuilder()); - wordNatures.put(DictWordType.VIEW, new ModelWordBuilder()); + wordNatures.put(DictWordType.DATASET, new ModelWordBuilder()); wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder()); wordNatures.put(DictWordType.VALUE, new ValueWordBuilder()); wordNatures.put(DictWordType.TAG, new TagWordBuilder()); diff --git a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/helper/NatureHelper.java b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/helper/NatureHelper.java index 8daa3f75e..5f127651c 100644 --- a/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/helper/NatureHelper.java +++ b/headless/core/src/main/java/com/tencent/supersonic/headless/core/knowledge/helper/NatureHelper.java @@ -40,7 +40,7 @@ public class NatureHelper { case ENTITY: result = SchemaElementType.ENTITY; break; - case VIEW: + case DATASET: result = SchemaElementType.DATASET; break; case VALUE: @@ -49,6 +49,9 @@ public class NatureHelper { case TAG: result = SchemaElementType.TAG; break; + case TAG_VALUE: + result = SchemaElementType.TAG_VALUE; + break; default: break; } @@ -57,7 +60,7 @@ public class NatureHelper { private static boolean isDataSetOrEntity(S2Term term, Integer model) { return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString() - .endsWith(DictWordType.ENTITY.getType()); + .endsWith(DictWordType.ENTITY.getTypeWithSpilt()); } public static Integer getDataSetByNature(Nature nature) { @@ -131,7 +134,8 @@ public class NatureHelper { if (split.length <= 1) { return false; } - return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(DictWordType.DIMENSION.getType()) + return !nature.endsWith(DictWordType.METRIC.getTypeWithSpilt()) && !nature.endsWith( + DictWordType.DIMENSION.getTypeWithSpilt()) && StringUtils.isNumeric(split[1]); } @@ -154,12 +158,12 @@ public class NatureHelper { private static long getDimensionCount(List terms) { return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() - .endsWith(DictWordType.DIMENSION.getType())).count(); + .endsWith(DictWordType.DIMENSION.getTypeWithSpilt())).count(); } private static long getMetricCount(List terms) { return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString() - .endsWith(DictWordType.METRIC.getType())).count(); + .endsWith(DictWordType.METRIC.getTypeWithSpilt())).count(); } /** diff --git a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/KnowledgeServiceImpl.java b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/KnowledgeServiceImpl.java index 9ef611caa..050c7d821 100644 --- a/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/KnowledgeServiceImpl.java +++ b/headless/server/src/main/java/com/tencent/supersonic/headless/server/service/impl/KnowledgeServiceImpl.java @@ -31,7 +31,7 @@ public class KnowledgeServiceImpl implements KnowledgeService { public void updateSemanticKnowledge(List natures) { List prefixes = natures.stream() - .filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) + .filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) .collect(Collectors.toList()); for (DictWord nature : prefixes) { @@ -39,7 +39,7 @@ public class KnowledgeServiceImpl implements KnowledgeService { } List suffixes = natures.stream() - .filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType())) + .filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt())) .collect(Collectors.toList()); SearchService.loadSuffix(suffixes); @@ -80,7 +80,7 @@ public class KnowledgeServiceImpl implements KnowledgeService { } public List prefixSearchByModel(String key, int limit, - Map> modelIdToDataSetIds) { + Map> modelIdToDataSetIds) { return SearchService.prefixSearch(key, limit, modelIdToDataSetIds); }