mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 12:37:55 +00:00
(improvement)(Chat)Integration of tags into chat. (#798)
This commit is contained in:
@@ -19,6 +19,7 @@ public class DataSetSchema {
|
|||||||
private Set<SchemaElement> dimensions = new HashSet<>();
|
private Set<SchemaElement> dimensions = new HashSet<>();
|
||||||
private Set<SchemaElement> dimensionValues = new HashSet<>();
|
private Set<SchemaElement> dimensionValues = new HashSet<>();
|
||||||
private Set<SchemaElement> tags = new HashSet<>();
|
private Set<SchemaElement> tags = new HashSet<>();
|
||||||
|
private Set<SchemaElement> tagValues = new HashSet<>();
|
||||||
private SchemaElement entity = new SchemaElement();
|
private SchemaElement entity = new SchemaElement();
|
||||||
private QueryConfig queryConfig;
|
private QueryConfig queryConfig;
|
||||||
|
|
||||||
@@ -44,34 +45,8 @@ public class DataSetSchema {
|
|||||||
case TAG:
|
case TAG:
|
||||||
element = tags.stream().filter(e -> e.getId() == elementID).findFirst();
|
element = tags.stream().filter(e -> e.getId() == elementID).findFirst();
|
||||||
break;
|
break;
|
||||||
default:
|
case TAG_VALUE:
|
||||||
}
|
element = tagValues.stream().filter(e -> e.getId() == elementID).findFirst();
|
||||||
|
|
||||||
if (element.isPresent()) {
|
|
||||||
return element.get();
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public SchemaElement getElement(SchemaElementType elementType, String name) {
|
|
||||||
Optional<SchemaElement> element = Optional.empty();
|
|
||||||
|
|
||||||
switch (elementType) {
|
|
||||||
case ENTITY:
|
|
||||||
element = Optional.ofNullable(entity);
|
|
||||||
break;
|
|
||||||
case DATASET:
|
|
||||||
element = Optional.of(dataSet);
|
|
||||||
break;
|
|
||||||
case METRIC:
|
|
||||||
element = metrics.stream().filter(e -> name.equals(e.getName())).findFirst();
|
|
||||||
break;
|
|
||||||
case DIMENSION:
|
|
||||||
element = dimensions.stream().filter(e -> name.equals(e.getName())).findFirst();
|
|
||||||
break;
|
|
||||||
case VALUE:
|
|
||||||
element = dimensionValues.stream().filter(e -> name.equals(e.getName())).findFirst();
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -46,6 +46,9 @@ public class SemanticSchema implements Serializable {
|
|||||||
case TAG:
|
case TAG:
|
||||||
element = getElementsById(elementID, getTags());
|
element = getElementsById(elementID, getTags());
|
||||||
break;
|
break;
|
||||||
|
case TAG_VALUE:
|
||||||
|
element = getElementsById(elementID, getTagValues());
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -91,13 +94,21 @@ public class SemanticSchema implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<SchemaElement> getTags(Long dataSetId) {
|
public List<SchemaElement> getTags(Long dataSetId) {
|
||||||
|
List<SchemaElement> tags = getTags();
|
||||||
|
return getElementsByDataSetId(dataSetId, tags);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SchemaElement> getTagValues() {
|
||||||
List<SchemaElement> tags = new ArrayList<>();
|
List<SchemaElement> tags = new ArrayList<>();
|
||||||
dataSetSchemaList.stream().filter(schemaElement ->
|
dataSetSchemaList.stream().forEach(d -> tags.addAll(d.getTagValues()));
|
||||||
dataSetId.equals(schemaElement.getDataSet().getDataSet()))
|
|
||||||
.forEach(d -> tags.addAll(d.getTags()));
|
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<SchemaElement> getTagValues(Long dataSetId) {
|
||||||
|
List<SchemaElement> tags = getTagValues();
|
||||||
|
return getElementsByDataSetId(dataSetId, tags);
|
||||||
|
}
|
||||||
|
|
||||||
public List<SchemaElement> getMetrics() {
|
public List<SchemaElement> getMetrics() {
|
||||||
List<SchemaElement> metrics = new ArrayList<>();
|
List<SchemaElement> metrics = new ArrayList<>();
|
||||||
dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics()));
|
dataSetSchemaList.stream().forEach(d -> metrics.addAll(d.getMetrics()));
|
||||||
|
|||||||
@@ -90,7 +90,12 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
|||||||
.map(retrieveQueryResult -> {
|
.map(retrieveQueryResult -> {
|
||||||
List<Retrieval> retrievals = retrieveQueryResult.getRetrieval();
|
List<Retrieval> retrievals = retrieveQueryResult.getRetrieval();
|
||||||
if (CollectionUtils.isNotEmpty(retrievals)) {
|
if (CollectionUtils.isNotEmpty(retrievals)) {
|
||||||
retrievals.removeIf(retrieval -> retrieval.getDistance() > distance.doubleValue());
|
retrievals.removeIf(retrieval -> {
|
||||||
|
if (!retrieveQueryResult.getQuery().contains(retrieval.getQuery())) {
|
||||||
|
return retrieval.getDistance() > distance.doubleValue();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
return retrieveQueryResult;
|
return retrieveQueryResult;
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -73,7 +73,8 @@ public class KeywordMapper extends BaseMapper {
|
|||||||
if (element == null) {
|
if (element == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (element.getType().equals(SchemaElementType.VALUE)) {
|
if (element.getType().equals(SchemaElementType.VALUE) || element.getType()
|
||||||
|
.equals(SchemaElementType.TAG_VALUE)) {
|
||||||
element.setName(hanlpMapResult.getName());
|
element.setName(hanlpMapResult.getName());
|
||||||
}
|
}
|
||||||
Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature);
|
Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature);
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
|||||||
// remove entity name where search
|
// remove entity name where search
|
||||||
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
|
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
|
||||||
List<String> natures = entry.getNatures().stream()
|
List<String> natures = entry.getNatures().stream()
|
||||||
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType()))
|
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getTypeWithSpilt()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (CollectionUtils.isEmpty(natures)) {
|
if (CollectionUtils.isEmpty(natures)) {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -51,6 +51,9 @@ public class DataSetSchemaBuilder {
|
|||||||
Set<SchemaElement> tags = getTags(resp);
|
Set<SchemaElement> tags = getTags(resp);
|
||||||
dataSetSchema.getTags().addAll(tags);
|
dataSetSchema.getTags().addAll(tags);
|
||||||
|
|
||||||
|
Set<SchemaElement> tagValues = getTagValues(resp);
|
||||||
|
dataSetSchema.getTagValues().addAll(tagValues);
|
||||||
|
|
||||||
SchemaElement entity = getEntity(resp);
|
SchemaElement entity = getEntity(resp);
|
||||||
if (Objects.nonNull(entity)) {
|
if (Objects.nonNull(entity)) {
|
||||||
dataSetSchema.setEntity(entity);
|
dataSetSchema.setEntity(entity);
|
||||||
@@ -91,6 +94,22 @@ public class DataSetSchemaBuilder {
|
|||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Set<SchemaElement> getTagValues(DataSetSchemaResp resp) {
|
||||||
|
Set<SchemaElement> dimensionValues = new HashSet<>();
|
||||||
|
for (TagResp tagResp : resp.getTags()) {
|
||||||
|
SchemaElement element = SchemaElement.builder()
|
||||||
|
.dataSet(resp.getId())
|
||||||
|
.model(tagResp.getModelId())
|
||||||
|
.id(tagResp.getId())
|
||||||
|
.name(tagResp.getName())
|
||||||
|
.bizName(tagResp.getBizName())
|
||||||
|
.type(SchemaElementType.TAG_VALUE)
|
||||||
|
.build();
|
||||||
|
dimensionValues.add(element);
|
||||||
|
}
|
||||||
|
return dimensionValues;
|
||||||
|
}
|
||||||
|
|
||||||
private static Set<SchemaElement> getDimensions(DataSetSchemaResp resp) {
|
private static Set<SchemaElement> getDimensions(DataSetSchemaResp resp) {
|
||||||
Set<SchemaElement> dimensions = new HashSet<>();
|
Set<SchemaElement> dimensions = new HashSet<>();
|
||||||
for (DimSchemaResp dim : resp.getDimensions()) {
|
for (DimSchemaResp dim : resp.getDimensions()) {
|
||||||
|
|||||||
@@ -32,7 +32,8 @@ public class SchemaDictUpdateListener implements ApplicationListener<DataEvent>
|
|||||||
DictWord dictWord = new DictWord();
|
DictWord dictWord = new DictWord();
|
||||||
dictWord.setWord(dataItem.getName());
|
dictWord.setWord(dataItem.getName());
|
||||||
String sign = DictWordType.NATURE_SPILT;
|
String sign = DictWordType.NATURE_SPILT;
|
||||||
String nature = sign + 1 + sign + dataItem.getId() + dataItem.getType().name().toLowerCase();
|
String suffixNature = DictWordType.getSuffixNature(dataItem.getType());
|
||||||
|
String nature = sign + dataItem.getModelId() + dataItem.getId() + suffixNature;
|
||||||
String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY;
|
String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY;
|
||||||
dictWord.setNature(nature);
|
dictWord.setNature(nature);
|
||||||
dictWord.setNatureWithFrequency(natureWithFrequency);
|
dictWord.setNatureWithFrequency(natureWithFrequency);
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ public class Constants {
|
|||||||
public static final String COLON = ":";
|
public static final String COLON = ":";
|
||||||
public static final String MINUS = "-";
|
public static final String MINUS = "-";
|
||||||
public static final String UNDERLINE = "_";
|
public static final String UNDERLINE = "_";
|
||||||
|
public static final String DICT_VALUE = "v";
|
||||||
public static final String UNDERLINE_DOUBLE = "__";
|
public static final String UNDERLINE_DOUBLE = "__";
|
||||||
public static final String PARENTHESES_START = "(";
|
public static final String PARENTHESES_START = "(";
|
||||||
public static final String PARENTHESES_END = ")";
|
public static final String PARENTHESES_END = ")";
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.tencent.supersonic.common.pojo.enums;
|
package com.tencent.supersonic.common.pojo.enums;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
/***
|
/***
|
||||||
@@ -12,15 +13,17 @@ public enum DictWordType {
|
|||||||
|
|
||||||
DIMENSION("dimension"),
|
DIMENSION("dimension"),
|
||||||
|
|
||||||
VALUE("v"),
|
VALUE("dv"),
|
||||||
|
|
||||||
VIEW("view"),
|
DATASET("dataset"),
|
||||||
|
|
||||||
ENTITY("entity"),
|
ENTITY("entity"),
|
||||||
|
|
||||||
NUMBER("m"),
|
NUMBER("m"),
|
||||||
|
|
||||||
TAG("t"),
|
TAG("tag"),
|
||||||
|
|
||||||
|
TAG_VALUE("tv"),
|
||||||
|
|
||||||
SUFFIX("suffix");
|
SUFFIX("suffix");
|
||||||
|
|
||||||
@@ -32,7 +35,7 @@ public enum DictWordType {
|
|||||||
this.type = type;
|
this.type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getType() {
|
public String getTypeWithSpilt() {
|
||||||
return NATURE_SPILT + type;
|
return NATURE_SPILT + type;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,19 +44,36 @@ public enum DictWordType {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
for (DictWordType dictWordType : values()) {
|
for (DictWordType dictWordType : values()) {
|
||||||
if (nature.endsWith(dictWordType.getType())) {
|
if (nature.endsWith(dictWordType.getTypeWithSpilt())) {
|
||||||
return dictWordType;
|
return dictWordType;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//view
|
//dataSet
|
||||||
String[] natures = nature.split(DictWordType.NATURE_SPILT);
|
String[] natures = nature.split(DictWordType.NATURE_SPILT);
|
||||||
if (natures.length == 2 && StringUtils.isNumeric(natures[1])) {
|
if (natures.length == 2 && StringUtils.isNumeric(natures[1])) {
|
||||||
return VIEW;
|
return DATASET;
|
||||||
}
|
}
|
||||||
//dimension value
|
//dimension value
|
||||||
if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
|
if (natures.length >= 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
|
||||||
return VALUE;
|
return VALUE;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static DictWordType of(TypeEnums type) {
|
||||||
|
for (DictWordType wordType : DictWordType.values()) {
|
||||||
|
if (wordType.name().equalsIgnoreCase(type.name())) {
|
||||||
|
return wordType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getSuffixNature(TypeEnums type) {
|
||||||
|
DictWordType wordType = of(type);
|
||||||
|
if (Objects.nonNull(wordType)) {
|
||||||
|
return wordType.type;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,11 +13,6 @@ import com.fasterxml.jackson.databind.type.CollectionType;
|
|||||||
import com.fasterxml.jackson.databind.type.MapType;
|
import com.fasterxml.jackson.databind.type.MapType;
|
||||||
import com.fasterxml.jackson.databind.type.TypeFactory;
|
import com.fasterxml.jackson.databind.type.TypeFactory;
|
||||||
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
|
||||||
import lombok.Getter;
|
|
||||||
import lombok.Setter;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.security.InvalidParameterException;
|
import java.security.InvalidParameterException;
|
||||||
@@ -25,6 +20,9 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@@ -32,7 +30,6 @@ public class JsonUtil {
|
|||||||
|
|
||||||
public static final JsonUtil INSTANCE = new JsonUtil();
|
public static final JsonUtil INSTANCE = new JsonUtil();
|
||||||
|
|
||||||
@Setter
|
|
||||||
@Getter
|
@Getter
|
||||||
private final ObjectMapper objectMapper = new ObjectMapper();
|
private final ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ public enum SchemaElementType {
|
|||||||
VALUE,
|
VALUE,
|
||||||
ENTITY,
|
ENTITY,
|
||||||
TAG,
|
TAG,
|
||||||
|
TAG_VALUE,
|
||||||
ID,
|
ID,
|
||||||
DATE
|
DATE
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.tencent.supersonic.headless.api.pojo.response;
|
package com.tencent.supersonic.headless.api.pojo.response;
|
||||||
|
|
||||||
|
import static com.tencent.supersonic.common.pojo.Constants.DICT_VALUE;
|
||||||
import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE;
|
import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE;
|
||||||
|
|
||||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||||
@@ -10,6 +11,7 @@ import lombok.Data;
|
|||||||
|
|
||||||
@Data
|
@Data
|
||||||
public class DictItemResp {
|
public class DictItemResp {
|
||||||
|
|
||||||
private Long id;
|
private Long id;
|
||||||
|
|
||||||
private Long modelId;
|
private Long modelId;
|
||||||
@@ -31,7 +33,9 @@ public class DictItemResp {
|
|||||||
private StatusEnum status;
|
private StatusEnum status;
|
||||||
|
|
||||||
public String getNature() {
|
public String getNature() {
|
||||||
return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1);
|
return UNDERLINE + modelId + UNDERLINE + itemId + UNDERLINE + type.name().toLowerCase().substring(0, 1)
|
||||||
|
+ DICT_VALUE;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String fetchDictFileName() {
|
public String fetchDictFileName() {
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ public class SearchService {
|
|||||||
entry -> {
|
entry -> {
|
||||||
String name = entry.getKey().replace("#", " ");
|
String name = entry.getKey().replace("#", " ");
|
||||||
List<String> natures = entry.getValue().stream()
|
List<String> natures = entry.getValue().stream()
|
||||||
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
|
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getTypeWithSpilt(), ""))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
name = StringUtils.reverse(name);
|
name = StringUtils.reverse(name);
|
||||||
return new HanlpMapResult(name, natures, key);
|
return new HanlpMapResult(name, natures, key);
|
||||||
@@ -160,8 +160,8 @@ public class SearchService {
|
|||||||
if (Objects.nonNull(natures) && natures.length > 0) {
|
if (Objects.nonNull(natures) && natures.length > 0) {
|
||||||
trie.put(dictWord.getWord(), getValue(natures));
|
trie.put(dictWord.getWord(), getValue(natures));
|
||||||
}
|
}
|
||||||
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
|
if (dictWord.getNature().contains(DictWordType.METRIC.getTypeWithSpilt()) || dictWord.getNature()
|
||||||
.contains(DictWordType.DIMENSION.getType())) {
|
.contains(DictWordType.DIMENSION.getTypeWithSpilt())) {
|
||||||
suffixTrie.remove(dictWord.getWord());
|
suffixTrie.remove(dictWord.getWord());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ public class DimensionWordBuilder extends BaseWordWithAliasBuilder {
|
|||||||
dictWord.setWord(word);
|
dictWord.setWord(word);
|
||||||
Long modelId = schemaElement.getModel();
|
Long modelId = schemaElement.getModel();
|
||||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.DIMENSION.getType();
|
+ DictWordType.DIMENSION.getTypeWithSpilt();
|
||||||
if (isSuffix) {
|
if (isSuffix) {
|
||||||
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
|
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.DIMENSION.getTypeWithSpilt();
|
||||||
}
|
}
|
||||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||||
return dictWord;
|
return dictWord;
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ public class EntityWordBuilder extends BaseWordWithAliasBuilder {
|
|||||||
@Override
|
@Override
|
||||||
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
public DictWord getOneWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||||
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
|
String nature = DictWordType.NATURE_SPILT + schemaElement.getModel()
|
||||||
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getType();
|
+ DictWordType.NATURE_SPILT + schemaElement.getId() + DictWordType.ENTITY.getTypeWithSpilt();
|
||||||
DictWord dictWord = new DictWord();
|
DictWord dictWord = new DictWord();
|
||||||
dictWord.setWord(word);
|
dictWord.setWord(word);
|
||||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
|
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
|
||||||
|
|||||||
@@ -31,10 +31,10 @@ public class MetricWordBuilder extends BaseWordWithAliasBuilder {
|
|||||||
dictWord.setWord(word);
|
dictWord.setWord(word);
|
||||||
Long modelId = schemaElement.getModel();
|
Long modelId = schemaElement.getModel();
|
||||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.METRIC.getType();
|
+ DictWordType.METRIC.getTypeWithSpilt();
|
||||||
if (isSuffix) {
|
if (isSuffix) {
|
||||||
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
|
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.METRIC.getTypeWithSpilt();
|
||||||
}
|
}
|
||||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||||
return dictWord;
|
return dictWord;
|
||||||
|
|||||||
@@ -28,10 +28,10 @@ public class TagWordBuilder extends BaseWordWithAliasBuilder {
|
|||||||
dictWord.setWord(word);
|
dictWord.setWord(word);
|
||||||
Long modelId = schemaElement.getModel();
|
Long modelId = schemaElement.getModel();
|
||||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.TAG.getType();
|
+ DictWordType.TAG.getTypeWithSpilt();
|
||||||
if (isSuffix) {
|
if (isSuffix) {
|
||||||
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||||
+ DictWordType.SUFFIX.getType() + DictWordType.TAG.getType();
|
+ DictWordType.SUFFIX.getTypeWithSpilt() + DictWordType.TAG.getTypeWithSpilt();
|
||||||
}
|
}
|
||||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||||
return dictWord;
|
return dictWord;
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ public class WordBuilderFactory {
|
|||||||
static {
|
static {
|
||||||
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
|
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
|
||||||
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
|
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
|
||||||
wordNatures.put(DictWordType.VIEW, new ModelWordBuilder());
|
wordNatures.put(DictWordType.DATASET, new ModelWordBuilder());
|
||||||
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
|
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
|
||||||
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
|
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
|
||||||
wordNatures.put(DictWordType.TAG, new TagWordBuilder());
|
wordNatures.put(DictWordType.TAG, new TagWordBuilder());
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ public class NatureHelper {
|
|||||||
case ENTITY:
|
case ENTITY:
|
||||||
result = SchemaElementType.ENTITY;
|
result = SchemaElementType.ENTITY;
|
||||||
break;
|
break;
|
||||||
case VIEW:
|
case DATASET:
|
||||||
result = SchemaElementType.DATASET;
|
result = SchemaElementType.DATASET;
|
||||||
break;
|
break;
|
||||||
case VALUE:
|
case VALUE:
|
||||||
@@ -49,6 +49,9 @@ public class NatureHelper {
|
|||||||
case TAG:
|
case TAG:
|
||||||
result = SchemaElementType.TAG;
|
result = SchemaElementType.TAG;
|
||||||
break;
|
break;
|
||||||
|
case TAG_VALUE:
|
||||||
|
result = SchemaElementType.TAG_VALUE;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -57,7 +60,7 @@ public class NatureHelper {
|
|||||||
|
|
||||||
private static boolean isDataSetOrEntity(S2Term term, Integer model) {
|
private static boolean isDataSetOrEntity(S2Term term, Integer model) {
|
||||||
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
|
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
|
||||||
.endsWith(DictWordType.ENTITY.getType());
|
.endsWith(DictWordType.ENTITY.getTypeWithSpilt());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Integer getDataSetByNature(Nature nature) {
|
public static Integer getDataSetByNature(Nature nature) {
|
||||||
@@ -131,7 +134,8 @@ public class NatureHelper {
|
|||||||
if (split.length <= 1) {
|
if (split.length <= 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(DictWordType.DIMENSION.getType())
|
return !nature.endsWith(DictWordType.METRIC.getTypeWithSpilt()) && !nature.endsWith(
|
||||||
|
DictWordType.DIMENSION.getTypeWithSpilt())
|
||||||
&& StringUtils.isNumeric(split[1]);
|
&& StringUtils.isNumeric(split[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -154,12 +158,12 @@ public class NatureHelper {
|
|||||||
|
|
||||||
private static long getDimensionCount(List<S2Term> terms) {
|
private static long getDimensionCount(List<S2Term> terms) {
|
||||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||||
.endsWith(DictWordType.DIMENSION.getType())).count();
|
.endsWith(DictWordType.DIMENSION.getTypeWithSpilt())).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long getMetricCount(List<S2Term> terms) {
|
private static long getMetricCount(List<S2Term> terms) {
|
||||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||||
.endsWith(DictWordType.METRIC.getType())).count();
|
.endsWith(DictWordType.METRIC.getTypeWithSpilt())).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ public class KnowledgeServiceImpl implements KnowledgeService {
|
|||||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||||
|
|
||||||
List<DictWord> prefixes = natures.stream()
|
List<DictWord> prefixes = natures.stream()
|
||||||
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
for (DictWord nature : prefixes) {
|
for (DictWord nature : prefixes) {
|
||||||
@@ -39,7 +39,7 @@ public class KnowledgeServiceImpl implements KnowledgeService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
List<DictWord> suffixes = natures.stream()
|
List<DictWord> suffixes = natures.stream()
|
||||||
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getTypeWithSpilt()))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
SearchService.loadSuffix(suffixes);
|
SearchService.loadSuffix(suffixes);
|
||||||
@@ -80,7 +80,7 @@ public class KnowledgeServiceImpl implements KnowledgeService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
|
public List<HanlpMapResult> prefixSearchByModel(String key, int limit,
|
||||||
Map<Long, List<Long>> modelIdToDataSetIds) {
|
Map<Long, List<Long>> modelIdToDataSetIds) {
|
||||||
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds);
|
return SearchService.prefixSearch(key, limit, modelIdToDataSetIds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user