mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-14 22:25:19 +00:00
(improvement)(headless) transfer term nature modelId to viewId before providing it to chat and put the modelId of metadata into the dict word instead of viewId (#739)
Co-authored-by: jolunoluo
This commit is contained in:
@@ -6,8 +6,15 @@ import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DimensionValueReq;
|
||||
import com.tencent.supersonic.headless.core.knowledge.helper.NatureHelper;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
@@ -17,11 +24,6 @@ import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DimensionValueReq;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public class SearchService {
|
||||
|
||||
@@ -39,14 +41,14 @@ public class SearchService {
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public static List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> detectModelIds) {
|
||||
return prefixSearch(key, limit, trie, detectModelIds);
|
||||
public static List<HanlpMapResult> prefixSearch(String key, int limit, Map<Long, List<Long>> modelIdToViewIds) {
|
||||
return prefixSearch(key, limit, trie, modelIdToViewIds);
|
||||
}
|
||||
|
||||
public static List<HanlpMapResult> prefixSearch(String key, int limit, BinTrie<List<String>> binTrie,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, detectModelIds);
|
||||
return result.stream().map(
|
||||
Map<Long, List<Long>> modelIdToViewIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, modelIdToViewIds.keySet());
|
||||
List<HanlpMapResult> hanlpMapResults = result.stream().map(
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
return new HanlpMapResult(name, entry.getValue(), key);
|
||||
@@ -54,6 +56,13 @@ public class SearchService {
|
||||
).sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.limit(SEARCH_SIZE)
|
||||
.collect(Collectors.toList());
|
||||
for (HanlpMapResult hanlpMapResult : hanlpMapResults) {
|
||||
List<String> natures = hanlpMapResult.getNatures().stream()
|
||||
.map(nature -> NatureHelper.changeModel2View(nature, modelIdToViewIds))
|
||||
.flatMap(Collection::stream).collect(Collectors.toList());
|
||||
hanlpMapResult.setNatures(natures);
|
||||
}
|
||||
return hanlpMapResults;
|
||||
}
|
||||
|
||||
/***
|
||||
|
||||
@@ -38,11 +38,11 @@ public class DimensionWordBuilder extends BaseWordBuilder {
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
Long modelId = schemaElement.getModel();
|
||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.DIMENSION.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
|
||||
@@ -27,8 +27,8 @@ public class EntityWordBuilder extends BaseWordBuilder {
|
||||
return result;
|
||||
}
|
||||
|
||||
Long view = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + view + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
Long modelId = schemaElement.getModel();
|
||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.ENTITY.getType();
|
||||
|
||||
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
|
||||
@@ -38,11 +38,11 @@ public class MetricWordBuilder extends BaseWordBuilder {
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
Long modelId = schemaElement.getModel();
|
||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.METRIC.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
|
||||
@@ -27,8 +27,8 @@ public class ValueWordBuilder extends BaseWordBuilder {
|
||||
|
||||
schemaElement.getAlias().stream().forEach(value -> {
|
||||
DictWord dictWord = new DictWord();
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId();
|
||||
Long modelId = schemaElement.getModel();
|
||||
String nature = DictWordType.NATURE_SPILT + modelId + DictWordType.NATURE_SPILT + schemaElement.getId();
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
dictWord.setWord(value);
|
||||
result.add(dictWord);
|
||||
|
||||
@@ -1,34 +1,36 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.seg.Segment;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.headless.core.knowledge.HadoopFileIOAdapter;
|
||||
import com.tencent.supersonic.headless.core.knowledge.MapResult;
|
||||
import com.tencent.supersonic.headless.core.knowledge.MultiCustomDictionary;
|
||||
import com.tencent.supersonic.headless.core.knowledge.SearchService;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
/**
|
||||
* HanLP helper
|
||||
@@ -212,18 +214,25 @@ public class HanlpHelper {
|
||||
}
|
||||
}
|
||||
|
||||
public static List<com.tencent.supersonic.headless.api.pojo.response.S2Term> getTerms(String text) {
|
||||
public static List<S2Term> getTerms(String text, Map<Long, List<Long>> modelIdToViewIds) {
|
||||
return getSegment().seg(text.toLowerCase()).stream()
|
||||
.filter(term -> term.getNature().startsWith(DictWordType.NATURE_SPILT))
|
||||
.map(term -> transform2ApiTerm(term))
|
||||
.map(term -> transform2ApiTerm(term, modelIdToViewIds))
|
||||
.flatMap(Collection::stream)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static S2Term transform2ApiTerm(Term term) {
|
||||
S2Term knowledgeTerm = new S2Term();
|
||||
BeanUtils.copyProperties(term, knowledgeTerm);
|
||||
knowledgeTerm.setFrequency(term.getFrequency());
|
||||
return knowledgeTerm;
|
||||
public static List<S2Term> transform2ApiTerm(Term term, Map<Long, List<Long>> modelIdToViewIds) {
|
||||
List<S2Term> s2Terms = Lists.newArrayList();
|
||||
List<String> natures = NatureHelper.changeModel2View(String.valueOf(term.getNature()), modelIdToViewIds);
|
||||
for (String nature : natures) {
|
||||
S2Term s2Term = new S2Term();
|
||||
BeanUtils.copyProperties(term, s2Term);
|
||||
s2Term.setNature(Nature.create(nature));
|
||||
s2Term.setFrequency(term.getFrequency());
|
||||
s2Terms.add(s2Term);
|
||||
}
|
||||
return s2Terms;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.ViewInfoStat;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
@@ -81,6 +83,43 @@ public class NatureHelper {
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Long getModelId(String nature) {
|
||||
try {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
return Long.valueOf(split[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
log.error("", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static Nature changeModel2View(String nature, Long viewId) {
|
||||
try {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
split[1] = String.valueOf(viewId);
|
||||
return Nature.create(StringUtils.join(split, DictWordType.NATURE_SPILT));
|
||||
} catch (NumberFormatException e) {
|
||||
log.error("", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static List<String> changeModel2View(String nature, Map<Long, List<Long>> modelIdToViewIds) {
|
||||
Long modelId = getModelId(nature);
|
||||
List<Long> viewIds = modelIdToViewIds.get(modelId);
|
||||
if (CollectionUtils.isEmpty(viewIds)) {
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
return viewIds.stream().map(viewId -> String.valueOf(changeModel2View(nature, viewId)))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static boolean isDimensionValueViewId(String nature) {
|
||||
if (StringUtils.isEmpty(nature)) {
|
||||
return false;
|
||||
|
||||
Reference in New Issue
Block a user