mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
[improvement][project] supersonic 0.7.0 version backend update (#20)
Co-authored-by: kanedai <kanedai@tencent.com>
This commit is contained in:
@@ -102,6 +102,16 @@
|
||||
<artifactId>auth-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.tencent.supersonic</groupId>
|
||||
<artifactId>chat-api</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.tencent.supersonic</groupId>
|
||||
<artifactId>semantic-query</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
*/
|
||||
protected V value;
|
||||
|
||||
public String prefix = null;
|
||||
protected String prefix = null;
|
||||
|
||||
public BaseNode<V> transition(String path, int begin) {
|
||||
BaseNode<V> cur = this;
|
||||
|
||||
@@ -3,7 +3,7 @@ package com.hankcs.hanlp.seg.common;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.CustomDictionary;
|
||||
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
package com.tencent.supersonic.knowledge;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import com.tencent.supersonic.knowledge.service.KnowledgeService;
|
||||
import com.tencent.supersonic.knowledge.service.WordService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.context.event.ApplicationStartedEvent;
|
||||
import org.springframework.context.ApplicationListener;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class ApplicationStartedInit implements ApplicationListener<ApplicationStartedEvent> {
|
||||
|
||||
@Autowired
|
||||
private KnowledgeService knowledgeService;
|
||||
@Autowired
|
||||
private WordService wordService;
|
||||
@Autowired
|
||||
private SchemaService schemaService;
|
||||
|
||||
@Override
|
||||
public void onApplicationEvent(ApplicationStartedEvent event) {
|
||||
try {
|
||||
log.debug("ApplicationStartedInit start");
|
||||
|
||||
List<DictWord> dictWords = wordService.getAllDictWords();
|
||||
wordService.setPreDictWords(dictWords);
|
||||
knowledgeService.reloadAllData(dictWords);
|
||||
|
||||
log.debug("ApplicationStartedInit end");
|
||||
} catch (Exception e) {
|
||||
log.error("ApplicationStartedInit error", e);
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* reload knowledge task
|
||||
*/
|
||||
@Scheduled(cron = "${reload.knowledge.corn:0 0/1 * * * ?}")
|
||||
public void reloadKnowledge() {
|
||||
log.debug("reloadKnowledge start");
|
||||
|
||||
try {
|
||||
List<DictWord> dictWords = wordService.getAllDictWords();
|
||||
List<DictWord> preDictWords = wordService.getPreDictWords();
|
||||
|
||||
if (CollectionUtils.isEqualCollection(dictWords, preDictWords)) {
|
||||
log.debug("dictWords has not changed, reloadKnowledge end");
|
||||
return;
|
||||
}
|
||||
log.info("dictWords has changed");
|
||||
wordService.setPreDictWords(dictWords);
|
||||
knowledgeService.updateOnlineKnowledge(wordService.getAllDictWords());
|
||||
schemaService.getCache().refresh(SchemaService.ALL_CACHE);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("reloadKnowledge error", e);
|
||||
}
|
||||
|
||||
log.debug("reloadKnowledge end");
|
||||
}
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* base word nature
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class BaseWordNature {
|
||||
|
||||
/**
|
||||
* 获取所有wordNature
|
||||
*
|
||||
* @param itemDOS
|
||||
* @return
|
||||
*/
|
||||
public List<WordNature> getWordNatureList(List<ItemDO> itemDOS) {
|
||||
List<WordNature> wordNatures = new ArrayList<>();
|
||||
try {
|
||||
wordNatures = getWordNaturesWithException(itemDOS);
|
||||
} catch (Exception e) {
|
||||
log.error("getWordNatureList error,", e);
|
||||
}
|
||||
return wordNatures;
|
||||
}
|
||||
|
||||
public List<WordNature> getWordNaturesWithException(List<ItemDO> itemDOS) {
|
||||
|
||||
List<WordNature> wordNatures = new ArrayList<>();
|
||||
|
||||
for (ItemDO itemDO : itemDOS) {
|
||||
wordNatures.addAll(getWordNature(itemDO.getName(), itemDO));
|
||||
}
|
||||
return wordNatures;
|
||||
}
|
||||
|
||||
public abstract List<WordNature> getWordNature(String word, ItemDO itemDO);
|
||||
|
||||
public Integer getElementID(String nature) {
|
||||
String[] split = nature.split(NatureType.NATURE_SPILT);
|
||||
if (split.length >= 3) {
|
||||
return Integer.valueOf(split[2]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* dimension word nature
|
||||
*/
|
||||
@Service
|
||||
public class DimensionWordNature extends BaseWordNature {
|
||||
|
||||
@Value("${nlp.dimension.use.suffix:true}")
|
||||
private boolean nlpDimensionUseSuffix = true;
|
||||
|
||||
|
||||
@Override
|
||||
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
|
||||
List<WordNature> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, itemDO, false));
|
||||
if (nlpDimensionUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, itemDO, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private WordNature getOnwWordNature(String word, ItemDO itemDO, boolean isSuffix) {
|
||||
WordNature wordNature = new WordNature();
|
||||
wordNature.setWord(word);
|
||||
Integer domainId = itemDO.getDomain();
|
||||
String nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
|
||||
+ NatureType.DIMENSION.getType();
|
||||
if (isSuffix) {
|
||||
nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
|
||||
+ NatureType.SUFFIX.getType() + NatureType.DIMENSION.getType();
|
||||
}
|
||||
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
|
||||
return wordNature;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* domain word nature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DomainWordNature extends BaseWordNature {
|
||||
|
||||
@Override
|
||||
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
|
||||
List<WordNature> result = Lists.newArrayList();
|
||||
WordNature wordNature = new WordNature();
|
||||
wordNature.setWord(word);
|
||||
Integer domainId = itemDO.getDomain();
|
||||
String nature = NatureType.NATURE_SPILT + domainId;
|
||||
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
|
||||
result.add(wordNature);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EntityWordNature extends BaseWordNature {
|
||||
|
||||
@Override
|
||||
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
|
||||
List<WordNature> result = Lists.newArrayList();
|
||||
WordNature wordNature = new WordNature();
|
||||
wordNature.setWord(word);
|
||||
Integer domain = itemDO.getDomain();
|
||||
String nature = NatureType.NATURE_SPILT + domain + NatureType.NATURE_SPILT + itemDO.getItemId()
|
||||
+ NatureType.ENTITY.getType();
|
||||
wordNature.setNatureWithFrequency(String.format("%s 200000", nature));
|
||||
result.add(wordNature);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,48 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* Metric WordNature
|
||||
*/
|
||||
@Service
|
||||
public class MetricWordNature extends BaseWordNature {
|
||||
|
||||
@Value("${nlp.metric.use.suffix:true}")
|
||||
private boolean nlpMetricUseSuffix = true;
|
||||
|
||||
@Override
|
||||
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
|
||||
List<WordNature> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, itemDO, false));
|
||||
if (nlpMetricUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (!word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, itemDO, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private WordNature getOnwWordNature(String word, ItemDO itemDO, boolean isSuffix) {
|
||||
WordNature wordNature = new WordNature();
|
||||
wordNature.setWord(word);
|
||||
Integer domainId = itemDO.getDomain();
|
||||
String nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
|
||||
+ NatureType.METRIC.getType();
|
||||
if (isSuffix) {
|
||||
nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
|
||||
+ NatureType.SUFFIX.getType() + NatureType.METRIC.getType();
|
||||
}
|
||||
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
|
||||
return wordNature;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,60 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import com.tencent.supersonic.knowledge.domain.service.OnlineKnowledgeService;
|
||||
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
|
||||
import com.tencent.supersonic.knowledge.infrastructure.nlp.Suggester;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* online knowledge service impl
|
||||
*/
|
||||
@Service
|
||||
public class OnlineKnowledgeServiceImpl implements OnlineKnowledgeService {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(OnlineKnowledgeServiceImpl.class);
|
||||
|
||||
public void updateSemanticKnowledge(List<WordNature> natures) {
|
||||
|
||||
List<WordNature> prefixes = natures.stream()
|
||||
.filter(entry -> !entry.getNatureWithFrequency().contains(NatureType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (WordNature nature : prefixes) {
|
||||
HanlpHelper.addToCustomDictionary(nature);
|
||||
}
|
||||
|
||||
List<WordNature> suffixes = natures.stream()
|
||||
.filter(entry -> entry.getNatureWithFrequency().contains(NatureType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Suggester.loadSuffix(suffixes);
|
||||
}
|
||||
|
||||
|
||||
public void reloadAllData(List<WordNature> natures) {
|
||||
// 1. reload custom knowledge
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
} catch (Exception e) {
|
||||
logger.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
// 2. update online knowledge
|
||||
updateOnlineKnowledge(natures);
|
||||
}
|
||||
|
||||
public void updateOnlineKnowledge(List<WordNature> natures) {
|
||||
try {
|
||||
updateSemanticKnowledge(natures);
|
||||
} catch (Exception e) {
|
||||
logger.error("updateSemanticKnowledge error", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.common.nlp.ItemDO;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ValueWordNature extends BaseWordNature {
|
||||
|
||||
@Override
|
||||
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
|
||||
List<WordNature> result = Lists.newArrayList();
|
||||
WordNature wordNature = new WordNature();
|
||||
wordNature.setWord(word);
|
||||
Integer domain = itemDO.getDomain();
|
||||
String nature = NatureType.NATURE_SPILT + domain + NatureType.NATURE_SPILT + itemDO.getItemId();
|
||||
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
|
||||
result.add(wordNature);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.application.online;
|
||||
|
||||
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* WordNature Strategy Factory
|
||||
*/
|
||||
public class WordNatureStrategyFactory {
|
||||
|
||||
private static Map<NatureType, BaseWordNature> strategyFactory = new ConcurrentHashMap<>();
|
||||
|
||||
static {
|
||||
strategyFactory.put(NatureType.DIMENSION, new DimensionWordNature());
|
||||
strategyFactory.put(NatureType.METRIC, new MetricWordNature());
|
||||
strategyFactory.put(NatureType.DOMAIN, new DomainWordNature());
|
||||
strategyFactory.put(NatureType.ENTITY, new EntityWordNature());
|
||||
strategyFactory.put(NatureType.VALUE, new ValueWordNature());
|
||||
|
||||
|
||||
}
|
||||
|
||||
public static BaseWordNature get(NatureType strategyType) {
|
||||
return strategyFactory.get(strategyType);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
|
||||
public class DictTaskFilter {
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
public enum DictUpdateMode {
|
||||
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import java.util.Objects;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
/***
|
||||
* word nature
|
||||
*/
|
||||
@Data
|
||||
@ToString
|
||||
public class DictWord {
|
||||
|
||||
private String word;
|
||||
private String nature;
|
||||
private String natureWithFrequency;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
DictWord that = (DictWord) o;
|
||||
return Objects.equals(word, that.word) && Objects.equals(natureWithFrequency, that.natureWithFrequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(word, natureWithFrequency);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/***
|
||||
* nature type
|
||||
* such as : metric、dimension etc.
|
||||
*/
|
||||
public enum DictWordType {
|
||||
METRIC("metric"),
|
||||
DIMENSION("dimension"),
|
||||
VALUE("value"),
|
||||
|
||||
DOMAIN("dm"),
|
||||
ENTITY("entity"),
|
||||
|
||||
NUMBER("m"),
|
||||
|
||||
SUFFIX("suffix");
|
||||
|
||||
public static final String NATURE_SPILT = "_";
|
||||
public static final String SPACE = " ";
|
||||
private String type;
|
||||
|
||||
DictWordType(String type) {
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
public String getType() {
|
||||
return NATURE_SPILT + type;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public static DictWordType getNatureType(String nature) {
|
||||
if (StringUtils.isEmpty(nature) || !nature.startsWith(NATURE_SPILT)) {
|
||||
return null;
|
||||
}
|
||||
for (DictWordType dictWordType : values()) {
|
||||
if (nature.endsWith(dictWordType.getType())) {
|
||||
return dictWordType;
|
||||
}
|
||||
}
|
||||
//domain
|
||||
String[] natures = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (natures.length == 2 && StringUtils.isNumeric(natures[1])) {
|
||||
return DOMAIN;
|
||||
}
|
||||
//dimension value
|
||||
if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
|
||||
return VALUE;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
@@ -1,9 +1,11 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictUpdateMode;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
|
||||
import com.tencent.supersonic.common.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
|
||||
import java.util.Date;
|
||||
import lombok.Data;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.tencent.supersonic.knowledge.domain.pojo;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
|
||||
import com.tencent.supersonic.common.enums.TypeEnums;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import java.util.List;
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
@@ -23,4 +23,4 @@ public class DimValueInfo {
|
||||
private List<String> whiteList;
|
||||
private List<String> ruleList;
|
||||
private Boolean isDictInfo;
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.domain;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -12,15 +12,6 @@ public interface FileHandler {
|
||||
*/
|
||||
void backupFile(String fileName);
|
||||
|
||||
/**
|
||||
* move files to a specific directory
|
||||
* not backup
|
||||
*
|
||||
* @param fileName
|
||||
* @param targetDirectory
|
||||
*/
|
||||
void moveFile(String fileName, String targetDirectory);
|
||||
|
||||
/**
|
||||
* create a directory
|
||||
*
|
||||
@@ -1,23 +1,22 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import com.hankcs.hanlp.corpus.io.IIOAdapter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@Slf4j
|
||||
public class HadoopFileIOAdapter implements IIOAdapter {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(HadoopFileIOAdapter.class);
|
||||
|
||||
@Override
|
||||
public InputStream open(String path) throws IOException {
|
||||
LOGGER.info("open:{}", path);
|
||||
log.info("open:{}", path);
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.get(URI.create(path), conf);
|
||||
return fs.open(new Path(path));
|
||||
@@ -25,7 +24,7 @@ public class HadoopFileIOAdapter implements IIOAdapter {
|
||||
|
||||
@Override
|
||||
public OutputStream create(String path) throws IOException {
|
||||
LOGGER.info("create:{}", path);
|
||||
log.info("create:{}", path);
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.get(URI.create(path), conf);
|
||||
return fs.create(new Path(path));
|
||||
@@ -1,6 +1,6 @@
|
||||
package com.tencent.supersonic.knowledge.domain;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import java.io.FileNotFoundException;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.domain;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
@@ -44,18 +44,6 @@ public class LocalFileHandler implements FileHandler {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void moveFile(String filePath, String targetDirectoryPath) {
|
||||
Path sourcePath = Paths.get(filePath);
|
||||
Path targetPath = Paths.get(targetDirectoryPath, sourcePath.getFileName().toString());
|
||||
try {
|
||||
Files.move(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
log.info("File moved successfully!");
|
||||
} catch (IOException e) {
|
||||
log.info("Failed to move file: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createDir(String directoryPath) {
|
||||
Path path = Paths.get(directoryPath);
|
||||
@@ -136,4 +124,4 @@ public class LocalFileHandler implements FileHandler {
|
||||
}
|
||||
return Files.newBufferedWriter(Paths.get(filePath), StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class MapResult implements Serializable {
|
||||
|
||||
private String name;
|
||||
private List<String> natures;
|
||||
private int offset = 0;
|
||||
|
||||
private double similarity;
|
||||
|
||||
private String detectWord;
|
||||
|
||||
public MapResult() {
|
||||
|
||||
}
|
||||
|
||||
public MapResult(String name, List<String> natures, String detectWord) {
|
||||
this.name = name;
|
||||
this.natures = natures;
|
||||
this.detectWord = detectWord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
MapResult that = (MapResult) o;
|
||||
return Objects.equals(name, that.name) && Objects.equals(natures, that.natures);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(name, natures);
|
||||
}
|
||||
|
||||
public void setOffset(int offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import static com.hankcs.hanlp.utility.Predefine.logger;
|
||||
|
||||
@@ -14,6 +14,8 @@ import com.hankcs.hanlp.dictionary.other.CharTable;
|
||||
import com.hankcs.hanlp.utility.LexiconUtility;
|
||||
import com.hankcs.hanlp.utility.Predefine;
|
||||
import com.hankcs.hanlp.utility.TextUtility;
|
||||
import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
@@ -109,13 +111,13 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
attribute = DictionaryAttributeUtil.getAttribute(map.get(word), attribute);
|
||||
map.put(word, attribute);
|
||||
if (addToSuggeterTrie) {
|
||||
Suggester.put(word, attribute);
|
||||
SearchService.put(word, attribute);
|
||||
}
|
||||
|
||||
} else {
|
||||
map.put(word, attribute);
|
||||
if (addToSuggeterTrie) {
|
||||
Suggester.put(word, attribute);
|
||||
SearchService.put(word, attribute);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -125,6 +127,20 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
}
|
||||
}
|
||||
|
||||
public boolean load(String... path) {
|
||||
this.path = path;
|
||||
long start = System.currentTimeMillis();
|
||||
if (!this.loadMainDictionary(path[0])) {
|
||||
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
|
||||
return false;
|
||||
} else {
|
||||
Predefine.logger.info(
|
||||
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
|
||||
this.path = path;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* load main dictionary
|
||||
* @param mainPath
|
||||
@@ -176,7 +192,7 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
logger.info("正在构建DoubleArrayTrie……");
|
||||
dat.build(map);
|
||||
if (addToSuggestTrie) {
|
||||
// Suggester.save();
|
||||
// SearchService.save();
|
||||
}
|
||||
if (isCache) {
|
||||
// 缓存成dat文件,下次加载会快很多
|
||||
@@ -219,6 +235,10 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
}
|
||||
}
|
||||
|
||||
public boolean loadMainDictionary(String mainPath) {
|
||||
return loadMainDictionary(mainPath, this.path, this.dat, true, addToSuggesterTrie);
|
||||
}
|
||||
|
||||
public static boolean loadDat(String path, DoubleArrayTrie<CoreDictionary.Attribute> dat) {
|
||||
return loadDat(path, HanLP.Config.CustomDictionaryPath, dat);
|
||||
}
|
||||
@@ -303,24 +323,6 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
return word;
|
||||
}
|
||||
|
||||
public boolean load(String... path) {
|
||||
this.path = path;
|
||||
long start = System.currentTimeMillis();
|
||||
if (!this.loadMainDictionary(path[0])) {
|
||||
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
|
||||
return false;
|
||||
} else {
|
||||
Predefine.logger.info(
|
||||
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
|
||||
this.path = path;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean loadMainDictionary(String mainPath) {
|
||||
return loadMainDictionary(mainPath, this.path, this.dat, true, addToSuggesterTrie);
|
||||
}
|
||||
|
||||
public boolean reload() {
|
||||
if (this.path != null && this.path.length != 0) {
|
||||
IOUtil.deleteFile(this.path[0] + ".bin");
|
||||
@@ -368,7 +370,7 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
// return true;
|
||||
}
|
||||
if (addToSuggesterTrie) {
|
||||
Suggester.put(word, att);
|
||||
SearchService.put(word, att);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* base word nature
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class BaseWordBuilder {
|
||||
|
||||
public static final Long DEFAULT_FREQUENCY = 100000L;
|
||||
|
||||
public List<DictWord> getDictWords(List<SchemaElement> schemaElements) {
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
try {
|
||||
dictWords = getDictWordsWithException(schemaElements);
|
||||
} catch (Exception e) {
|
||||
log.error("getWordNatureList error,", e);
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
protected List<DictWord> getDictWordsWithException(List<SchemaElement> schemaElements) {
|
||||
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
|
||||
for (SchemaElement schemaElement : schemaElements) {
|
||||
dictWords.addAll(doGet(schemaElement.getName(), schemaElement));
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
protected abstract List<DictWord> doGet(String word, SchemaElement schemaElement);
|
||||
|
||||
public Long getElementID(String nature) {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length >= 3) {
|
||||
return Long.valueOf(split[2]);
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* dimension word nature
|
||||
*/
|
||||
@Service
|
||||
public class DimensionWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Value("${nlp.dimension.use.suffix:true}")
|
||||
private boolean nlpDimensionUseSuffix = true;
|
||||
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, schemaElement, false));
|
||||
if (nlpDimensionUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, schemaElement, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long domainId = schemaElement.getDomain();
|
||||
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.DIMENSION.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
return dictWord;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* domain word nature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DomainWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long domainId = schemaElement.getDomain();
|
||||
String nature = DictWordType.NATURE_SPILT + domainId;
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
result.add(dictWord);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EntityWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long domain = schemaElement.getDomain();
|
||||
String nature = DictWordType.NATURE_SPILT + domain + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.ENTITY.getType();
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
|
||||
result.add(dictWord);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* Metric DictWord
|
||||
*/
|
||||
@Service
|
||||
public class MetricWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Value("${nlp.metric.use.suffix:true}")
|
||||
private boolean nlpMetricUseSuffix = true;
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, schemaElement, false));
|
||||
if (nlpMetricUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (!word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, schemaElement, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long domainId = schemaElement.getDomain();
|
||||
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.METRIC.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
return dictWord;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ValueWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
if (Objects.nonNull(schemaElement) && !CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
|
||||
schemaElement.getAlias().stream().forEach(value -> {
|
||||
DictWord dictWord = new DictWord();
|
||||
Long domainId = schemaElement.getDomain();
|
||||
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId();
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
dictWord.setWord(value);
|
||||
result.add(dictWord);
|
||||
});
|
||||
}
|
||||
log.debug("ValueWordBuilder, result:{}", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary.builder;
|
||||
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.*;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* DictWord Strategy Factory
|
||||
*/
|
||||
public class WordBuilderFactory {
|
||||
|
||||
private static Map<DictWordType, BaseWordBuilder> wordNatures = new ConcurrentHashMap<>();
|
||||
|
||||
static {
|
||||
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
|
||||
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
|
||||
wordNatures.put(DictWordType.DOMAIN, new DomainWordBuilder());
|
||||
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
|
||||
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
|
||||
}
|
||||
|
||||
public static BaseWordBuilder get(DictWordType strategyType) {
|
||||
return wordNatures.get(strategyType);
|
||||
}
|
||||
}
|
||||
@@ -1,59 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.domain.converter;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.common.util.json.JsonUtil;
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DimValue2DictCommand;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DimValueInfo;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
public class DictTaskConverter {
|
||||
|
||||
private static String dateTimeFormatter = "yyyyMMddHHmmss";
|
||||
|
||||
public static DimValueDictTaskPO generateDimValueDictTaskPO(DimValue2DictCommand dimValue2DictCommend, User user) {
|
||||
DimValueDictTaskPO taskPO = new DimValueDictTaskPO();
|
||||
Date createAt = new Date();
|
||||
String date = DateTimeFormatter.ofPattern(dateTimeFormatter)
|
||||
.format(createAt.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime());
|
||||
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
|
||||
String updateMode = dimValue2DictCommend.getUpdateMode().getValue();
|
||||
String name = String.format("DimValue_dic_%s_%s_%s", updateMode, creator, date);
|
||||
taskPO.setName(name);
|
||||
|
||||
taskPO.setCreatedAt(createAt);
|
||||
taskPO.setCommand(JsonUtil.toString(dimValue2DictCommend));
|
||||
taskPO.setStatus(TaskStatusEnum.RUNNING.getCode());
|
||||
taskPO.setCreatedBy(creator);
|
||||
|
||||
return taskPO;
|
||||
}
|
||||
|
||||
public static DictConfPO generateDictConfPO(DictConfig dictConfig, User user) {
|
||||
DictConfPO dictConfPO = new DictConfPO();
|
||||
dictConfPO.setDimValueInfos(JsonUtil.toString(dictConfig.getDimValueInfoList()));
|
||||
dictConfPO.setDomainId(dictConfig.getDomainId());
|
||||
|
||||
dictConfPO.setCreatedBy(user.getName());
|
||||
dictConfPO.setUpdatedBy(user.getName());
|
||||
dictConfPO.setCreatedAt(new Date());
|
||||
dictConfPO.setUpdatedAt(new Date());
|
||||
|
||||
return dictConfPO;
|
||||
}
|
||||
|
||||
public static DictConfig dictConfPO2Config(DictConfPO dictConfPO) {
|
||||
DictConfig dictConfig = new DictConfig();
|
||||
dictConfig.setDomainId(dictConfPO.getDomainId());
|
||||
List<DimValueInfo> dimValueInfos = JsonUtil.toList(dictConfPO.getDimValueInfos(), DimValueInfo.class);
|
||||
dictConfig.setDimValueInfoList(dimValueInfos);
|
||||
return dictConfig;
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.domain.repository;
|
||||
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DimValueDictInfo;
|
||||
import java.util.List;
|
||||
|
||||
public interface DictRepository {
|
||||
|
||||
Long createDimValueDictTask(DimValueDictTaskPO dimValueDictTaskPO);
|
||||
|
||||
Boolean updateDictTaskStatus(Integer status, DimValueDictTaskPO dimValueDictTaskPO);
|
||||
|
||||
List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter);
|
||||
|
||||
Boolean createDictConf(DictConfPO dictConfPO);
|
||||
|
||||
Boolean editDictConf(DictConfPO dictConfPO);
|
||||
|
||||
Boolean upsertDictInfo(DictConfPO dictConfPO);
|
||||
|
||||
DictConfig getDictInfoByDomainId(Long domainId);
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.domain.service;
|
||||
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* online knowledge service interface
|
||||
*/
|
||||
public interface OnlineKnowledgeService {
|
||||
|
||||
void updateSemanticKnowledge(List<WordNature> natures);
|
||||
|
||||
void reloadAllData(List<WordNature> natures);
|
||||
|
||||
void updateOnlineKnowledge(List<WordNature> natures);
|
||||
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.custom;
|
||||
|
||||
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictConfMapper {
|
||||
|
||||
Boolean createDictConf(DictConfPO dictConfPO);
|
||||
|
||||
Boolean editDictConf(DictConfPO dictConfPO);
|
||||
|
||||
Boolean upsertDictInfo(DictConfPO dictConfPO);
|
||||
|
||||
DictConfPO getDictInfoByDomainId(Long domainId);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.custom;
|
||||
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
|
||||
import java.util.List;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictTaskMapper {
|
||||
|
||||
Long createDimValueTask(DimValueDictTaskPO dimValueDictTaskPO);
|
||||
|
||||
Boolean updateTaskStatus(DimValueDictTaskPO dimValueDictTaskPO);
|
||||
|
||||
List<DimValueDictTaskPO> searchDictTaskList(DictTaskFilter filter);
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.repository;
|
||||
|
||||
import com.tencent.supersonic.common.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.knowledge.domain.converter.DictTaskConverter;
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
|
||||
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
|
||||
import com.tencent.supersonic.knowledge.domain.pojo.DimValueDictInfo;
|
||||
import com.tencent.supersonic.knowledge.domain.repository.DictRepository;
|
||||
import com.tencent.supersonic.knowledge.infrastructure.custom.DictConfMapper;
|
||||
import com.tencent.supersonic.knowledge.infrastructure.custom.DictTaskMapper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
|
||||
@Repository
|
||||
public class DictRepositoryImpl implements DictRepository {
|
||||
|
||||
private final DictTaskMapper dictTaskMapper;
|
||||
private final DictConfMapper dictConfMapper;
|
||||
|
||||
public DictRepositoryImpl(DictTaskMapper dictTaskMapper,
|
||||
DictConfMapper dictConfMapper) {
|
||||
this.dictTaskMapper = dictTaskMapper;
|
||||
this.dictConfMapper = dictConfMapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long createDimValueDictTask(DimValueDictTaskPO dimValueDictTaskPO) {
|
||||
dictTaskMapper.createDimValueTask(dimValueDictTaskPO);
|
||||
return dimValueDictTaskPO.getId();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Boolean updateDictTaskStatus(Integer status, DimValueDictTaskPO dimValueDictTaskPO) {
|
||||
dimValueDictTaskPO.setStatus(status);
|
||||
Date createdAt = dimValueDictTaskPO.getCreatedAt();
|
||||
long elapsedMs = System.currentTimeMillis() - createdAt.getTime();
|
||||
dimValueDictTaskPO.setElapsedMs(elapsedMs);
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
dictTaskMapper.updateTaskStatus(dimValueDictTaskPO);
|
||||
return null;
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter) {
|
||||
List<DimValueDictInfo> dimValueDictDescList = new ArrayList<>();
|
||||
List<DimValueDictTaskPO> dimValueDictTaskPOList = dictTaskMapper.searchDictTaskList(filter);
|
||||
if (!CollectionUtils.isEmpty(dimValueDictTaskPOList)) {
|
||||
dimValueDictTaskPOList.stream().forEach(dictTaskPO -> {
|
||||
DimValueDictInfo dimValueDictDesc = new DimValueDictInfo();
|
||||
BeanUtils.copyProperties(dictTaskPO, dimValueDictDesc);
|
||||
dimValueDictDesc.setStatus(TaskStatusEnum.of(dictTaskPO.getStatus()));
|
||||
dimValueDictDescList.add(dimValueDictDesc);
|
||||
});
|
||||
}
|
||||
return dimValueDictDescList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean createDictConf(DictConfPO dictConfPO) {
|
||||
return dictConfMapper.createDictConf(dictConfPO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean editDictConf(DictConfPO dictConfPO) {
|
||||
return dictConfMapper.editDictConf(dictConfPO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean upsertDictInfo(DictConfPO dictConfPO) {
|
||||
return dictConfMapper.upsertDictInfo(dictConfPO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictConfig getDictInfoByDomainId(Long domainId) {
|
||||
DictConfPO dictConfPO = dictConfMapper.getDictInfoByDomainId(domainId);
|
||||
if (Objects.isNull(dictConfPO)) {
|
||||
return null;
|
||||
}
|
||||
return DictTaskConverter.dictConfPO2Config(dictConfPO);
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,10 @@
|
||||
package com.tencent.supersonic.knowledge.domain.dataobject;
|
||||
package com.tencent.supersonic.knowledge.persistence.dataobject;
|
||||
|
||||
import java.util.Date;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DictConfPO {
|
||||
public class DictConfDO {
|
||||
|
||||
private Long id;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.domain.dataobject;
|
||||
package com.tencent.supersonic.knowledge.persistence.dataobject;
|
||||
|
||||
import java.util.Date;
|
||||
import lombok.Data;
|
||||
@@ -7,7 +7,7 @@ import org.apache.commons.codec.digest.DigestUtils;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class DimValueDictTaskPO {
|
||||
public class DictTaskDO {
|
||||
|
||||
private Long id;
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
package com.tencent.supersonic.knowledge.persistence.mapper;
|
||||
|
||||
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictConfMapper {
|
||||
|
||||
Boolean createDictConf(DictConfDO dictConfDO);
|
||||
|
||||
Boolean editDictConf(DictConfDO dictConfDO);
|
||||
|
||||
Boolean upsertDictInfo(DictConfDO dictConfDO);
|
||||
|
||||
DictConfDO getDictInfoByDomainId(Long domainId);
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.tencent.supersonic.knowledge.persistence.mapper;
|
||||
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
|
||||
import java.util.List;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictTaskMapper {
|
||||
|
||||
Long createDimValueTask(DictTaskDO dictTaskDO);
|
||||
|
||||
Boolean updateTaskStatus(DictTaskDO dictTaskDO);
|
||||
|
||||
List<DictTaskDO> searchDictTaskList(DictTaskFilter filter);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.tencent.supersonic.knowledge.persistence.repository;
|
||||
|
||||
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
|
||||
import java.util.List;
|
||||
|
||||
public interface DictRepository {
|
||||
|
||||
Long createDimValueDictTask(DictTaskDO dictTaskDO);
|
||||
|
||||
Boolean updateDictTaskStatus(Integer status, DictTaskDO dictTaskDO);
|
||||
|
||||
List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter);
|
||||
|
||||
DictConfig getDictInfoByDomainId(Long domainId);
|
||||
}
|
||||
@@ -0,0 +1,77 @@
|
||||
package com.tencent.supersonic.knowledge.persistence.repository;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.knowledge.utils.DictTaskConverter;
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
|
||||
import com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper;
|
||||
import com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
|
||||
@Repository
|
||||
public class DictRepositoryImpl implements DictRepository {
|
||||
|
||||
private final DictTaskMapper dictTaskMapper;
|
||||
private final DictConfMapper dictConfMapper;
|
||||
|
||||
public DictRepositoryImpl(DictTaskMapper dictTaskMapper,
|
||||
DictConfMapper dictConfMapper) {
|
||||
this.dictTaskMapper = dictTaskMapper;
|
||||
this.dictConfMapper = dictConfMapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long createDimValueDictTask(DictTaskDO dictTaskDO) {
|
||||
dictTaskMapper.createDimValueTask(dictTaskDO);
|
||||
return dictTaskDO.getId();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Boolean updateDictTaskStatus(Integer status, DictTaskDO dictTaskDO) {
|
||||
dictTaskDO.setStatus(status);
|
||||
Date createdAt = dictTaskDO.getCreatedAt();
|
||||
long elapsedMs = System.currentTimeMillis() - createdAt.getTime();
|
||||
dictTaskDO.setElapsedMs(elapsedMs);
|
||||
CompletableFuture.supplyAsync(() -> {
|
||||
dictTaskMapper.updateTaskStatus(dictTaskDO);
|
||||
return null;
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter) {
|
||||
List<DimValueDictInfo> dimValueDictDescList = new ArrayList<>();
|
||||
List<DictTaskDO> dictTaskDOList = dictTaskMapper.searchDictTaskList(filter);
|
||||
if (!CollectionUtils.isEmpty(dictTaskDOList)) {
|
||||
dictTaskDOList.stream().forEach(dictTaskPO -> {
|
||||
DimValueDictInfo dimValueDictDesc = new DimValueDictInfo();
|
||||
BeanUtils.copyProperties(dictTaskPO, dimValueDictDesc);
|
||||
dimValueDictDesc.setStatus(TaskStatusEnum.of(dictTaskPO.getStatus()));
|
||||
dimValueDictDescList.add(dimValueDictDesc);
|
||||
});
|
||||
}
|
||||
return dimValueDictDescList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictConfig getDictInfoByDomainId(Long domainId) {
|
||||
DictConfDO dictConfDO = dictConfMapper.getDictInfoByDomainId(domainId);
|
||||
if (Objects.isNull(dictConfDO)) {
|
||||
return null;
|
||||
}
|
||||
return DictTaskConverter.dictConfPO2Config(dictConfDO);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
|
||||
import com.tencent.supersonic.common.pojo.Aggregator;
|
||||
import com.tencent.supersonic.common.pojo.Order;
|
||||
import com.tencent.supersonic.common.pojo.ResultData;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public abstract class BaseSemanticLayer implements SemanticLayer {
|
||||
|
||||
protected final Cache<String, List<DomainSchemaResp>> domainSchemaCache =
|
||||
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
|
||||
|
||||
protected ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
|
||||
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
|
||||
};
|
||||
|
||||
@SneakyThrows
|
||||
public List<DomainSchemaResp> fetchDomainSchema(List<Long> ids, Boolean cacheEnable) {
|
||||
if (cacheEnable) {
|
||||
return domainSchemaCache.get(String.valueOf(ids), () -> {
|
||||
List<DomainSchemaResp> data = doFetchDomainSchema(ids);
|
||||
return data;
|
||||
});
|
||||
}
|
||||
List<DomainSchemaResp> data = doFetchDomainSchema(ids);
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DomainSchema getDomainSchema(Long domain, Boolean cacheEnable) {
|
||||
List<Long> ids = new ArrayList<>();
|
||||
ids.add(domain);
|
||||
List<DomainSchemaResp> domainSchemaResps = fetchDomainSchema(ids, cacheEnable);
|
||||
if (!CollectionUtils.isEmpty(domainSchemaResps)) {
|
||||
Optional<DomainSchemaResp> domainSchemaResp = domainSchemaResps.stream()
|
||||
.filter(d -> d.getId().equals(domain)).findFirst();
|
||||
if (domainSchemaResp.isPresent()) {
|
||||
DomainSchemaResp domainSchema = domainSchemaResp.get();
|
||||
return DomainSchemaBuilder.build(domainSchema);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainSchema> getDomainSchema() {
|
||||
return getDomainSchema(new ArrayList<>());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainSchema> getDomainSchema(List<Long> ids) {
|
||||
List<DomainSchema> domainSchemaList = new ArrayList<>();
|
||||
|
||||
for(DomainSchemaResp resp : fetchDomainSchema(ids, true)) {
|
||||
domainSchemaList.add(DomainSchemaBuilder.build(resp));
|
||||
}
|
||||
|
||||
return domainSchemaList;
|
||||
}
|
||||
|
||||
protected void deletionDuplicated(QueryStructReq queryStructReq) {
|
||||
if (!CollectionUtils.isEmpty(queryStructReq.getGroups()) && queryStructReq.getGroups().size() > 1) {
|
||||
Set<String> groups = new HashSet<>();
|
||||
groups.addAll(queryStructReq.getGroups());
|
||||
queryStructReq.getGroups().clear();
|
||||
queryStructReq.getGroups().addAll(groups);
|
||||
}
|
||||
}
|
||||
|
||||
protected void onlyQueryFirstMetric(QueryStructReq queryStructReq) {
|
||||
if (!CollectionUtils.isEmpty(queryStructReq.getAggregators()) && queryStructReq.getAggregators().size() > 1) {
|
||||
log.info("multi metric in aggregators:{} , only query first one", queryStructReq.getAggregators());
|
||||
List<Aggregator> aggregators = queryStructReq.getAggregators().subList(0, 1);
|
||||
List<String> excludeAggregators = queryStructReq.getAggregators().stream().map(a -> a.getColumn())
|
||||
.filter(a -> !a.equals(aggregators.get(0).getColumn())).collect(
|
||||
Collectors.toList());
|
||||
queryStructReq.setAggregators(aggregators);
|
||||
List<Order> orders = queryStructReq.getOrders().stream()
|
||||
.filter(o -> !excludeAggregators.contains(o.getColumn())).collect(
|
||||
Collectors.toList());
|
||||
log.info("multi metric in orders:{} ", queryStructReq.getOrders());
|
||||
queryStructReq.setOrders(orders);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids);
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@Configuration
|
||||
@Data
|
||||
public class DefaultSemanticConfig {
|
||||
|
||||
@Value("${semantic.url.prefix:http://localhost:8081}")
|
||||
private String semanticUrl;
|
||||
|
||||
@Value("${searchByStruct.path:/api/semantic/query/struct}")
|
||||
private String searchByStructPath;
|
||||
|
||||
@Value("${searchByStruct.path:/api/semantic/query/multiStruct}")
|
||||
private String searchByMultiStructPath;
|
||||
|
||||
@Value("${searchByStruct.path:/api/semantic/query/sql}")
|
||||
private String searchBySqlPath;
|
||||
|
||||
@Value("${fetchDomainSchemaPath.path:/api/semantic/schema}")
|
||||
private String fetchDomainSchemaPath;
|
||||
|
||||
@Value("${fetchDomainList.path:/api/semantic/schema/dimension/page}")
|
||||
private String fetchDimensionPagePath;
|
||||
|
||||
@Value("${fetchDomainList.path:/api/semantic/schema/metric/page}")
|
||||
private String fetchMetricPagePath;
|
||||
|
||||
@Value("${fetchDomainList.path:/api/semantic/schema/domain/list}")
|
||||
private String fetchDomainListPath;
|
||||
|
||||
@Value("${fetchDomainList.path:/api/semantic/schema/domain/view/list}")
|
||||
private String fetchDomainViewListPath;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,119 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.semantic.api.model.pojo.DimValueMap;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class DomainSchemaBuilder {
|
||||
|
||||
public static DomainSchema build(DomainSchemaResp resp) {
|
||||
DomainSchema domainSchema = new DomainSchema();
|
||||
|
||||
SchemaElement domain = SchemaElement.builder()
|
||||
.domain(resp.getId())
|
||||
.id(resp.getId())
|
||||
.name(resp.getName())
|
||||
.bizName(resp.getBizName())
|
||||
.type(SchemaElementType.DOMAIN)
|
||||
.build();
|
||||
domainSchema.setDomain(domain);
|
||||
|
||||
Set<SchemaElement> metrics = new HashSet<>();
|
||||
for (MetricSchemaResp metric : resp.getMetrics()) {
|
||||
SchemaElement metricToAdd = SchemaElement.builder()
|
||||
.domain(resp.getId())
|
||||
.id(metric.getId())
|
||||
.name(metric.getName())
|
||||
.bizName(metric.getBizName())
|
||||
.type(SchemaElementType.METRIC)
|
||||
.useCnt(metric.getUseCnt())
|
||||
.build();
|
||||
metrics.add(metricToAdd);
|
||||
|
||||
String alias = metric.getAlias();
|
||||
if (StringUtils.isNotEmpty(alias)) {
|
||||
SchemaElement alisMetricToAdd = new SchemaElement();
|
||||
BeanUtils.copyProperties(metricToAdd, alisMetricToAdd);
|
||||
alisMetricToAdd.setName(alias);
|
||||
metrics.add(alisMetricToAdd);
|
||||
}
|
||||
}
|
||||
domainSchema.getMetrics().addAll(metrics);
|
||||
|
||||
Set<SchemaElement> dimensions = new HashSet<>();
|
||||
Set<SchemaElement> dimensionValues = new HashSet<>();
|
||||
for (DimSchemaResp dim : resp.getDimensions()) {
|
||||
|
||||
Set<String> dimValueAlias = new HashSet<>();
|
||||
if (!CollectionUtils.isEmpty(dim.getDimValueMaps())) {
|
||||
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
|
||||
for (DimValueMap dimValueMap : dimValueMaps) {
|
||||
if (Strings.isNotEmpty(dimValueMap.getBizName())) {
|
||||
dimValueAlias.add(dimValueMap.getBizName());
|
||||
}
|
||||
if (!CollectionUtils.isEmpty(dimValueMap.getAlias())) {
|
||||
dimValueAlias.addAll(dimValueMap.getAlias());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SchemaElement dimToAdd = SchemaElement.builder()
|
||||
.domain(resp.getId())
|
||||
.id(dim.getId())
|
||||
.name(dim.getName())
|
||||
.bizName(dim.getBizName())
|
||||
.type(SchemaElementType.DIMENSION)
|
||||
.useCnt(dim.getUseCnt())
|
||||
.build();
|
||||
dimensions.add(dimToAdd);
|
||||
|
||||
String alias = dim.getAlias();
|
||||
if (StringUtils.isNotEmpty(alias)) {
|
||||
SchemaElement alisDimToAdd = new SchemaElement();
|
||||
BeanUtils.copyProperties(dimToAdd, alisDimToAdd);
|
||||
alisDimToAdd.setName(alias);
|
||||
dimensions.add(alisDimToAdd);
|
||||
}
|
||||
|
||||
|
||||
SchemaElement dimValueToAdd = SchemaElement.builder()
|
||||
.domain(resp.getId())
|
||||
.id(dim.getId())
|
||||
.name(dim.getName())
|
||||
.bizName(dim.getBizName())
|
||||
.type(SchemaElementType.VALUE)
|
||||
.useCnt(dim.getUseCnt())
|
||||
.alias(new ArrayList<>(Arrays.asList(dimValueAlias.toArray(new String[0]))))
|
||||
.build();
|
||||
dimensionValues.add(dimValueToAdd);
|
||||
}
|
||||
domainSchema.getDimensions().addAll(dimensions);
|
||||
domainSchema.getDimensionValues().addAll(dimensionValues);
|
||||
|
||||
if (!CollectionUtils.isEmpty(resp.getEntityNames())) {
|
||||
Set<SchemaElement> entities = new HashSet<>();
|
||||
for (String entity : resp.getEntityNames()) {
|
||||
entities.add(SchemaElement.builder()
|
||||
.domain(resp.getId())
|
||||
.id(resp.getId())
|
||||
.name(entity)
|
||||
.bizName(entity)
|
||||
.type(SchemaElementType.ENTITY)
|
||||
.build());
|
||||
}
|
||||
domainSchema.getEntities().addAll(entities);
|
||||
}
|
||||
|
||||
return domainSchema;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.util.S2ThreadContext;
|
||||
import com.tencent.supersonic.common.util.ThreadContext;
|
||||
import com.tencent.supersonic.semantic.api.model.request.DomainSchemaFilterReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
import com.tencent.supersonic.semantic.model.domain.DimensionService;
|
||||
import com.tencent.supersonic.semantic.model.domain.DomainService;
|
||||
import com.tencent.supersonic.semantic.model.domain.MetricService;
|
||||
import com.tencent.supersonic.semantic.query.service.QueryService;
|
||||
import com.tencent.supersonic.semantic.query.service.SchemaService;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class LocalSemanticLayer extends BaseSemanticLayer {
|
||||
|
||||
private SchemaService schemaService;
|
||||
private S2ThreadContext s2ThreadContext;
|
||||
private DomainService domainService;
|
||||
private DimensionService dimensionService;
|
||||
private MetricService metricService;
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
|
||||
deletionDuplicated(queryStructReq);
|
||||
onlyQueryFirstMetric(queryStructReq);
|
||||
try {
|
||||
QueryService queryService = ContextUtils.getBean(QueryService.class);
|
||||
QueryResultWithSchemaResp queryResultWithSchemaResp = queryService.queryByStruct(queryStructReq, user);
|
||||
return queryResultWithSchemaResp;
|
||||
} catch (Exception e) {
|
||||
log.info("queryByStruct has an exception:{}", e.toString());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByMultiStruct(QueryMultiStructReq queryMultiStructReq, User user) {
|
||||
for (QueryStructReq queryStructReq : queryMultiStructReq.getQueryStructReqs()) {
|
||||
deletionDuplicated(queryStructReq);
|
||||
onlyQueryFirstMetric(queryStructReq);
|
||||
}
|
||||
try {
|
||||
QueryService queryService = ContextUtils.getBean(QueryService.class);
|
||||
return queryService.queryByMultiStruct(queryMultiStructReq, user);
|
||||
} catch (Exception e) {
|
||||
log.info("queryByMultiStruct has an exception:{}", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
|
||||
try {
|
||||
QueryService queryService = ContextUtils.getBean(QueryService.class);
|
||||
Object object = queryService.queryBySql(queryDslReq, user);
|
||||
QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object),
|
||||
QueryResultWithSchemaResp.class);
|
||||
return queryResultWithSchemaResp;
|
||||
} catch (Exception e) {
|
||||
log.info("queryByDsl has an exception:{}", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids) {
|
||||
DomainSchemaFilterReq filter = new DomainSchemaFilterReq();
|
||||
filter.setDomainIds(ids);
|
||||
User user = new User(1L, "admin", "admin", "admin@email");
|
||||
schemaService = ContextUtils.getBean(SchemaService.class);
|
||||
return schemaService.fetchDomainSchema(filter, user);
|
||||
}
|
||||
@Override
|
||||
public List<DomainResp> getDomainListForViewer() {
|
||||
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
|
||||
ThreadContext threadContext = s2ThreadContext.get();
|
||||
domainService = ContextUtils.getBean(DomainService.class);
|
||||
return domainService.getDomainListForViewer(threadContext.getUserName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainResp> getDomainListForAdmin() {
|
||||
domainService = ContextUtils.getBean(DomainService.class);
|
||||
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
|
||||
ThreadContext threadContext = s2ThreadContext.get();
|
||||
return domainService.getDomainListForAdmin(threadContext.getUserName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionCmd) {
|
||||
dimensionService = ContextUtils.getBean(DimensionService.class);
|
||||
return dimensionService.queryDimension(pageDimensionCmd);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageInfo<MetricResp> getMetricPage(PageMetricReq pageMetricCmd) {
|
||||
metricService = ContextUtils.getBean(MetricService.class);
|
||||
return metricService.queryMetric(pageMetricCmd);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,245 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.gson.Gson;
|
||||
import com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig;
|
||||
import com.tencent.supersonic.auth.api.authentication.constant.UserConstants;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.S2ThreadContext;
|
||||
import com.tencent.supersonic.common.util.ThreadContext;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.semantic.api.model.request.DomainSchemaFilterReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
|
||||
import com.tencent.supersonic.semantic.api.model.response.*;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
import com.tencent.supersonic.common.pojo.exception.CommonException;
|
||||
import com.tencent.supersonic.common.pojo.ResultData;
|
||||
import com.tencent.supersonic.common.pojo.ReturnCode;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.Constants.*;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
|
||||
|
||||
@Slf4j
|
||||
public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
@Autowired
|
||||
private S2ThreadContext s2ThreadContext;
|
||||
@Autowired
|
||||
private AuthenticationConfig authenticationConfig;
|
||||
|
||||
private static final Cache<String, List<DomainSchemaResp>> domainSchemaCache =
|
||||
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
|
||||
private ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
|
||||
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
|
||||
};
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
|
||||
deletionDuplicated(queryStructReq);
|
||||
onlyQueryFirstMetric(queryStructReq);
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
return searchByRestTemplate(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchByStructPath(),
|
||||
new Gson().toJson(queryStructReq));
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByMultiStruct(QueryMultiStructReq queryMultiStructReq, User user) {
|
||||
for (QueryStructReq queryStructReq : queryMultiStructReq.getQueryStructReqs()) {
|
||||
deletionDuplicated(queryStructReq);
|
||||
onlyQueryFirstMetric(queryStructReq);
|
||||
}
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
return searchByRestTemplate(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchByMultiStructPath(),
|
||||
new Gson().toJson(queryMultiStructReq));
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
return searchByRestTemplate(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchBySqlPath(),
|
||||
new Gson().toJson(queryDslReq));
|
||||
}
|
||||
|
||||
public QueryResultWithSchemaResp searchByRestTemplate(String url, String jsonReq) {
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
fillToken(headers);
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
|
||||
HttpEntity<String> entity = new HttpEntity<>(jsonReq, headers);
|
||||
log.info("url:{},searchByRestTemplate:{}", url, entity.getBody());
|
||||
ResultData<QueryResultWithSchemaResp> responseBody;
|
||||
try {
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
|
||||
ResponseEntity<ResultData<QueryResultWithSchemaResp>> responseEntity = restTemplate.exchange(
|
||||
requestUrl, HttpMethod.POST, entity, structTypeRef);
|
||||
responseBody = responseEntity.getBody();
|
||||
log.info("ApiResponse<QueryResultWithColumns> responseBody:{}", responseBody);
|
||||
QueryResultWithSchemaResp semanticQuery = new QueryResultWithSchemaResp();
|
||||
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
|
||||
QueryResultWithSchemaResp data = responseBody.getData();
|
||||
semanticQuery.setColumns(data.getColumns());
|
||||
semanticQuery.setResultList(data.getResultList());
|
||||
semanticQuery.setSql(data.getSql());
|
||||
semanticQuery.setQueryAuthorization(data.getQueryAuthorization());
|
||||
return semanticQuery;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("search semantic interface error,url:" + url, e);
|
||||
}
|
||||
throw new CommonException(responseBody.getCode(), responseBody.getMsg());
|
||||
}
|
||||
|
||||
public List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids) {
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.set(UserConstants.INTERNAL, TRUE_LOWER);
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
fillToken(headers);
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainSchemaPath()).build()
|
||||
.encode().toUri();
|
||||
DomainSchemaFilterReq filter = new DomainSchemaFilterReq();
|
||||
filter.setDomainIds(ids);
|
||||
ParameterizedTypeReference<ResultData<List<DomainSchemaResp>>> responseTypeRef =
|
||||
new ParameterizedTypeReference<ResultData<List<DomainSchemaResp>>>() {
|
||||
};
|
||||
|
||||
HttpEntity<String> entity = new HttpEntity<>(JSON.toJSONString(filter), headers);
|
||||
|
||||
try {
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
ResponseEntity<ResultData<List<DomainSchemaResp>>> responseEntity = restTemplate.exchange(
|
||||
requestUrl, HttpMethod.POST, entity, responseTypeRef);
|
||||
ResultData<List<DomainSchemaResp>> responseBody = responseEntity.getBody();
|
||||
log.debug("ApiResponse<fetchDomainSchema> responseBody:{}", responseBody);
|
||||
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
|
||||
List<DomainSchemaResp> data = responseBody.getData();
|
||||
return data;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("fetchDomainSchema interface error", e);
|
||||
}
|
||||
throw new RuntimeException("fetchDomainSchema interface error");
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainResp> getDomainListForViewer() {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
Object domainDescListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainViewListPath(), null, HttpMethod.GET);
|
||||
List<DomainResp> domainDescList = JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
|
||||
return domainDescList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainResp> getDomainListForAdmin() {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
Object domainDescListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(), null, HttpMethod.GET);
|
||||
List<DomainResp> domainDescList = JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
|
||||
return domainDescList;
|
||||
}
|
||||
|
||||
public Object fetchHttpResult(String url, String bodyJson, HttpMethod httpMethod) {
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
fillToken(headers);
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
|
||||
ParameterizedTypeReference<ResultData<Object>> responseTypeRef =
|
||||
new ParameterizedTypeReference<ResultData<Object>>() {
|
||||
};
|
||||
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(bodyJson), headers);
|
||||
try {
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
ResponseEntity<ResultData<Object>> responseEntity = restTemplate.exchange(requestUrl,
|
||||
httpMethod, entity, responseTypeRef);
|
||||
ResultData<Object> responseBody = responseEntity.getBody();
|
||||
log.debug("ApiResponse<fetchDomainSchema> responseBody:{}", responseBody);
|
||||
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
|
||||
Object data = responseBody.getData();
|
||||
return data;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("fetchDomainSchema interface error", e);
|
||||
}
|
||||
throw new RuntimeException("fetchDomainSchema interface error");
|
||||
}
|
||||
|
||||
public void fillToken(HttpHeaders headers) {
|
||||
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
|
||||
authenticationConfig = ContextUtils.getBean(AuthenticationConfig.class);
|
||||
ThreadContext threadContext = s2ThreadContext.get();
|
||||
if (Objects.nonNull(threadContext) && Strings.isNotEmpty(threadContext.getToken())) {
|
||||
if (Objects.nonNull(authenticationConfig) && Strings.isNotEmpty(
|
||||
authenticationConfig.getTokenHttpHeaderKey())) {
|
||||
headers.set(authenticationConfig.getTokenHttpHeaderKey(), threadContext.getToken());
|
||||
}
|
||||
} else {
|
||||
log.debug("threadContext is null:{}", Objects.isNull(threadContext));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageInfo<MetricResp> getMetricPage(PageMetricReq pageMetricCmd) {
|
||||
String body = JsonUtil.toString(pageMetricCmd);
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
log.info("url:{}", defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath());
|
||||
Object dimensionListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(), body, HttpMethod.POST);
|
||||
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
|
||||
PageInfo<Object> metricDescObjectPageInfo = generatePageInfo(map);
|
||||
PageInfo<MetricResp> metricDescPageInfo = new PageInfo<>();
|
||||
BeanUtils.copyProperties(metricDescObjectPageInfo, metricDescPageInfo);
|
||||
metricDescPageInfo.setList(metricDescPageInfo.getList());
|
||||
return metricDescPageInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionCmd) {
|
||||
String body = JsonUtil.toString(pageDimensionCmd);
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
Object dimensionListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(), body, HttpMethod.POST);
|
||||
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
|
||||
PageInfo<Object> dimensionDescObjectPageInfo = generatePageInfo(map);
|
||||
PageInfo<DimensionResp> dimensionDescPageInfo = new PageInfo<>();
|
||||
BeanUtils.copyProperties(dimensionDescObjectPageInfo, dimensionDescPageInfo);
|
||||
dimensionDescPageInfo.setList(dimensionDescPageInfo.getList());
|
||||
return dimensionDescPageInfo;
|
||||
}
|
||||
|
||||
private PageInfo<Object> generatePageInfo(LinkedHashMap map) {
|
||||
PageInfo<Object> pageInfo = new PageInfo<>();
|
||||
pageInfo.setList((List<Object>) map.get(LIST_LOWER));
|
||||
Integer total = (Integer) map.get(TOTAL_LOWER);
|
||||
pageInfo.setTotal(total);
|
||||
Integer pageSize = (Integer) map.get(PAGESIZE_LOWER);
|
||||
pageInfo.setPageSize(pageSize);
|
||||
pageInfo.setPages((int) Math.ceil((double) total / pageSize));
|
||||
return pageInfo;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface KnowledgeService {
|
||||
|
||||
void updateSemanticKnowledge(List<DictWord> natures);
|
||||
|
||||
void reloadAllData(List<DictWord> natures);
|
||||
|
||||
void updateOnlineKnowledge(List<DictWord> natures);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class KnowledgeServiceImpl implements KnowledgeService {
|
||||
|
||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||
|
||||
List<DictWord> prefixes = natures.stream()
|
||||
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (DictWord nature : prefixes) {
|
||||
HanlpHelper.addToCustomDictionary(nature);
|
||||
}
|
||||
|
||||
List<DictWord> suffixes = natures.stream()
|
||||
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
SearchService.loadSuffix(suffixes);
|
||||
}
|
||||
|
||||
|
||||
public void reloadAllData(List<DictWord> natures) {
|
||||
// 1. reload custom knowledge
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
} catch (Exception e) {
|
||||
log.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
// 2. update online knowledge
|
||||
updateOnlineKnowledge(natures);
|
||||
}
|
||||
|
||||
public void updateOnlineKnowledge(List<DictWord> natures) {
|
||||
try {
|
||||
updateSemanticKnowledge(natures);
|
||||
} catch (Exception e) {
|
||||
log.error("updateSemanticKnowledge error", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class SchemaService {
|
||||
|
||||
private static final Integer META_CACHE_TIME = 5;
|
||||
public static final String ALL_CACHE = "all";
|
||||
|
||||
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
|
||||
|
||||
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()
|
||||
.expireAfterWrite(META_CACHE_TIME, TimeUnit.MINUTES)
|
||||
.build(
|
||||
new CacheLoader<String, SemanticSchema>() {
|
||||
@Override
|
||||
public SemanticSchema load(String key) {
|
||||
log.info("load getDomainSchemaInfo cache [{}]", key);
|
||||
return new SemanticSchema(semanticLayer.getDomainSchema());
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
public DomainSchema getDomainSchema(Long id) {
|
||||
return semanticLayer.getDomainSchema(id, true);
|
||||
}
|
||||
|
||||
public SemanticSchema getSemanticSchema() {
|
||||
return cache.getUnchecked(ALL_CACHE);
|
||||
}
|
||||
|
||||
public LoadingCache<String, SemanticSchema> getCache() {
|
||||
return cache;
|
||||
}
|
||||
}
|
||||
@@ -1,12 +1,13 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
|
||||
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.tencent.supersonic.common.nlp.MapResult;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -14,17 +15,18 @@ import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictionaryAttributeUtil;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Service
|
||||
public class Suggester {
|
||||
@Slf4j
|
||||
public class SearchService {
|
||||
|
||||
public static final int SEARCH_SIZE = 200;
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(Suggester.class);
|
||||
private static BinTrie<List<String>> trie;
|
||||
private static BinTrie<List<String>> suffixTrie;
|
||||
private static String localFileCache = "";
|
||||
@@ -75,7 +77,7 @@ public class Suggester {
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
List<String> natures = entry.getValue().stream()
|
||||
.map(nature -> nature.replaceAll(NatureType.SUFFIX.getType(), ""))
|
||||
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
|
||||
.collect(Collectors.toList());
|
||||
name = StringUtils.reverse(name);
|
||||
return new MapResult(name, natures, key);
|
||||
@@ -107,7 +109,7 @@ public class Suggester {
|
||||
}
|
||||
|
||||
public static void clear() {
|
||||
LOGGER.info("clear all trie");
|
||||
log.info("clear all trie");
|
||||
trie = new BinTrie<>();
|
||||
suffixTrie = new BinTrie<>();
|
||||
}
|
||||
@@ -117,12 +119,12 @@ public class Suggester {
|
||||
}
|
||||
|
||||
|
||||
public static void loadSuffix(List<WordNature> suffixes) {
|
||||
public static void loadSuffix(List<DictWord> suffixes) {
|
||||
if (CollectionUtils.isEmpty(suffixes)) {
|
||||
return;
|
||||
}
|
||||
TreeMap<String, CoreDictionary.Attribute> map = new TreeMap();
|
||||
for (WordNature suffix : suffixes) {
|
||||
for (DictWord suffix : suffixes) {
|
||||
CoreDictionary.Attribute attributeNew = suffix.getNatureWithFrequency() == null
|
||||
? new CoreDictionary.Attribute(Nature.nz, 1)
|
||||
: CoreDictionary.Attribute.create(suffix.getNatureWithFrequency());
|
||||
@@ -0,0 +1,52 @@
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.WordBuilderFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class WordService {
|
||||
|
||||
private List<DictWord> preDictWords = new ArrayList<>();
|
||||
|
||||
public List<DictWord> getAllDictWords() {
|
||||
SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
|
||||
SemanticSchema semanticSchema = new SemanticSchema(semanticLayer.getDomainSchema());
|
||||
|
||||
List<DictWord> words = new ArrayList<>();
|
||||
|
||||
addWordsByType(DictWordType.DIMENSION, semanticSchema.getDimensions(), words);
|
||||
addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words);
|
||||
addWordsByType(DictWordType.DOMAIN, semanticSchema.getDomains(), words);
|
||||
addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words);
|
||||
addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words);
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
private void addWordsByType(DictWordType value, List<SchemaElement> metas, List<DictWord> natures) {
|
||||
List<DictWord> natureList = WordBuilderFactory.get(value).getDictWords(metas);
|
||||
log.debug("nature type:{} , nature size:{}", value.name(), natureList.size());
|
||||
natures.addAll(natureList);
|
||||
}
|
||||
|
||||
public List<DictWord> getPreDictWords() {
|
||||
return preDictWords;
|
||||
}
|
||||
|
||||
public void setPreDictWords(List<DictWord> preDictWords) {
|
||||
this.preDictWords = preDictWords;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SemanticLayer;
|
||||
import org.springframework.core.io.support.SpringFactoriesLoader;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
public class ComponentFactory {
|
||||
|
||||
private static SemanticLayer semanticLayer;
|
||||
|
||||
public static SemanticLayer getSemanticLayer() {
|
||||
if (Objects.isNull(semanticLayer)) {
|
||||
semanticLayer = init(SemanticLayer.class);
|
||||
}
|
||||
return semanticLayer;
|
||||
}
|
||||
|
||||
public static void setSemanticLayer(SemanticLayer layer) {
|
||||
semanticLayer = layer;
|
||||
}
|
||||
|
||||
private static <T> List<T> init(Class<T> factoryType, List list) {
|
||||
list.addAll(SpringFactoriesLoader.loadFactories(factoryType,
|
||||
Thread.currentThread().getContextClassLoader()));
|
||||
return list;
|
||||
}
|
||||
|
||||
private static <T> T init(Class<T> factoryType) {
|
||||
return SpringFactoriesLoader.loadFactories(factoryType,
|
||||
Thread.currentThread().getContextClassLoader()).get(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DimValue2DictCommand;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DimValueInfo;
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
|
||||
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
public class DictTaskConverter {
|
||||
|
||||
private static String dateTimeFormatter = "yyyyMMddHHmmss";
|
||||
|
||||
public static DictTaskDO generateDimValueDictTaskPO(DimValue2DictCommand dimValue2DictCommend, User user) {
|
||||
DictTaskDO taskPO = new DictTaskDO();
|
||||
Date createAt = new Date();
|
||||
String date = DateTimeFormatter.ofPattern(dateTimeFormatter)
|
||||
.format(createAt.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime());
|
||||
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
|
||||
String updateMode = dimValue2DictCommend.getUpdateMode().getValue();
|
||||
String name = String.format("DimValue_dic_%s_%s_%s", updateMode, creator, date);
|
||||
taskPO.setName(name);
|
||||
|
||||
taskPO.setCreatedAt(createAt);
|
||||
taskPO.setCommand(JsonUtil.toString(dimValue2DictCommend));
|
||||
taskPO.setStatus(TaskStatusEnum.RUNNING.getCode());
|
||||
taskPO.setCreatedBy(creator);
|
||||
|
||||
return taskPO;
|
||||
}
|
||||
|
||||
public static DictConfig dictConfPO2Config(DictConfDO dictConfDO) {
|
||||
DictConfig dictConfig = new DictConfig();
|
||||
dictConfig.setDomainId(dictConfDO.getDomainId());
|
||||
List<DimValueInfo> dimValueInfos = JsonUtil.toList(dictConfDO.getDimValueInfos(), DimValueInfo.class);
|
||||
dictConfig.setDimValueInfoList(dimValueInfos);
|
||||
return dictConfig;
|
||||
}
|
||||
}
|
||||
@@ -1,19 +1,19 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
import static com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper.FILE_SPILT;
|
||||
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class FileHelper {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(FileHelper.class);
|
||||
public static final String FILE_SPILT = "/";
|
||||
|
||||
public static void deleteCacheFile(String[] path) throws IOException {
|
||||
|
||||
@@ -25,9 +25,9 @@ public class FileHelper {
|
||||
for (File file : customSubFiles) {
|
||||
try {
|
||||
file.delete();
|
||||
LOGGER.info("customPath:{},delete cache file:{}", customPath, file);
|
||||
log.info("customPath:{},delete cache file:{}", customPath, file);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("delete " + file, e);
|
||||
log.error("delete " + file, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -70,7 +70,7 @@ public class FileHelper {
|
||||
}
|
||||
}
|
||||
|
||||
LOGGER.info("CustomDictionaryPath:{}", fileList);
|
||||
log.info("CustomDictionaryPath:{}", fileList);
|
||||
CustomDictionaryPath = fileList.toArray(new String[0]);
|
||||
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
|
||||
: CustomDictionaryPath;
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
@@ -7,30 +7,33 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.seg.Segment;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.common.nlp.MapResult;
|
||||
import com.tencent.supersonic.common.nlp.NatureType;
|
||||
import com.tencent.supersonic.common.nlp.WordNature;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWord;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
|
||||
import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
|
||||
/**
|
||||
* HanLP helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class HanlpHelper {
|
||||
|
||||
public static final String FILE_SPILT = "/";
|
||||
public static final String SPACE_SPILT = "#";
|
||||
public static final String DICT_MAIN_FILE_NAME = "CustomDictionary.txt";
|
||||
public static final String DICT_CLASS = "classes";
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(HanlpHelper.class);
|
||||
public static volatile DynamicCustomDictionary CustomDictionary;
|
||||
private static volatile DynamicCustomDictionary CustomDictionary;
|
||||
private static volatile Segment segment;
|
||||
|
||||
static {
|
||||
@@ -38,7 +41,7 @@ public class HanlpHelper {
|
||||
try {
|
||||
resetHanlpConfig();
|
||||
} catch (FileNotFoundException e) {
|
||||
LOGGER.error("resetHanlpConfig error", e);
|
||||
log.error("resetHanlpConfig error", e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +79,7 @@ public class HanlpHelper {
|
||||
*/
|
||||
public static boolean reloadCustomDictionary() throws IOException {
|
||||
|
||||
LOGGER.info("reloadCustomDictionary start");
|
||||
log.info("reloadCustomDictionary start");
|
||||
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
||||
@@ -93,10 +96,10 @@ public class HanlpHelper {
|
||||
FileHelper.resetCustomPath(getDynamicCustomDictionary());
|
||||
}
|
||||
// 3.clear trie
|
||||
Suggester.clear();
|
||||
SearchService.clear();
|
||||
|
||||
boolean reload = getDynamicCustomDictionary().reload();
|
||||
LOGGER.info("reloadCustomDictionary end ,cost:{},reload:{}", System.currentTimeMillis() - startTime, reload);
|
||||
log.info("reloadCustomDictionary end ,cost:{},reload:{}", System.currentTimeMillis() - startTime, reload);
|
||||
return reload;
|
||||
}
|
||||
|
||||
@@ -108,7 +111,7 @@ public class HanlpHelper {
|
||||
|
||||
CustomDictionaryPath = Arrays.stream(CustomDictionaryPath).map(path -> hanlpPropertiesPath + FILE_SPILT + path)
|
||||
.toArray(String[]::new);
|
||||
LOGGER.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath, CustomDictionaryPath);
|
||||
log.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath, CustomDictionaryPath);
|
||||
|
||||
HanLP.Config.CoreDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
|
||||
HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
@@ -155,9 +158,9 @@ public class HanlpHelper {
|
||||
return ResourceUtils.getFile("classpath:hanlp.properties").getParent();
|
||||
}
|
||||
|
||||
public static boolean addToCustomDictionary(WordNature wordNature) {
|
||||
LOGGER.info("wordNature:{}", wordNature);
|
||||
return getDynamicCustomDictionary().insert(wordNature.getWord(), wordNature.getNatureWithFrequency());
|
||||
public static boolean addToCustomDictionary(DictWord dictWord) {
|
||||
log.info("dictWord:{}", dictWord);
|
||||
return getDynamicCustomDictionary().insert(dictWord.getWord(), dictWord.getNatureWithFrequency());
|
||||
}
|
||||
|
||||
public static void transLetterOriginal(List<MapResult> mapResults) {
|
||||
@@ -178,7 +181,7 @@ public class HanlpHelper {
|
||||
|
||||
public static List<Term> getTerms(String text) {
|
||||
return getSegment().seg(text.toLowerCase()).stream()
|
||||
.filter(term -> term.getNature().startsWith(NatureType.NATURE_SPILT))
|
||||
.filter(term -> term.getNature().startsWith(DictWordType.NATURE_SPILT))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package com.tencent.supersonic.knowledge.infrastructure.nlp;
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
import static com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper.FILE_SPILT;
|
||||
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.utility.Predefine;
|
||||
@@ -9,20 +8,19 @@ import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Hdfs File Helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class HdfsFileHelper {
|
||||
|
||||
private static final Logger LOGGER = LoggerFactory.getLogger(HdfsFileHelper.class);
|
||||
|
||||
/***
|
||||
* delete cache file
|
||||
* @param path
|
||||
@@ -31,24 +29,24 @@ public class HdfsFileHelper {
|
||||
public static void deleteCacheFile(String[] path) throws IOException {
|
||||
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
|
||||
String cacheFilePath = path[0] + Predefine.BIN_EXT;
|
||||
LOGGER.info("delete cache file:{}", cacheFilePath);
|
||||
log.info("delete cache file:{}", cacheFilePath);
|
||||
try {
|
||||
fs.delete(new Path(cacheFilePath), false);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("delete:" + cacheFilePath, e);
|
||||
log.error("delete:" + cacheFilePath, e);
|
||||
}
|
||||
int customBase = cacheFilePath.lastIndexOf(FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FILE_SPILT + "*.bin";
|
||||
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.bin";
|
||||
List<String> fileList = getFileList(fs, new Path(customPath));
|
||||
for (String file : fileList) {
|
||||
try {
|
||||
fs.delete(new Path(file), false);
|
||||
LOGGER.info("delete cache file:{}", file);
|
||||
log.info("delete cache file:{}", file);
|
||||
} catch (Exception e) {
|
||||
LOGGER.error("delete " + file, e);
|
||||
log.error("delete " + file, e);
|
||||
}
|
||||
}
|
||||
LOGGER.info("fileList:{}", fileList);
|
||||
log.info("fileList:{}", fileList);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -61,11 +59,11 @@ public class HdfsFileHelper {
|
||||
String[] path = CustomDictionaryPath;
|
||||
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
|
||||
String cacheFilePath = path[0] + Predefine.BIN_EXT;
|
||||
int customBase = cacheFilePath.lastIndexOf(FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FILE_SPILT + "*.txt";
|
||||
LOGGER.info("customPath:{}", customPath);
|
||||
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.txt";
|
||||
log.info("customPath:{}", customPath);
|
||||
List<String> fileList = getFileList(fs, new Path(customPath));
|
||||
LOGGER.info("CustomDictionaryPath:{}", fileList);
|
||||
log.info("CustomDictionaryPath:{}", fileList);
|
||||
CustomDictionaryPath = fileList.toArray(new String[0]);
|
||||
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
|
||||
: CustomDictionaryPath;
|
||||
@@ -1,2 +1,2 @@
|
||||
com.tencent.supersonic.knowledge.domain.FileHandler=\
|
||||
com.tencent.supersonic.knowledge.domain.LocalFileHandler
|
||||
com.tencent.supersonic.knowledge.dictionary.FileHandler=\
|
||||
com.tencent.supersonic.knowledge.dictionary.LocalFileHandler
|
||||
@@ -2,10 +2,10 @@
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
|
||||
<mapper namespace="com.tencent.supersonic.knowledge.infrastructure.custom.DictConfMapper">
|
||||
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper">
|
||||
|
||||
<resultMap id="DictConfPO"
|
||||
type="com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO">
|
||||
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO">
|
||||
<id column="id" property="id"/>
|
||||
<result column="domain_id" property="domainId"/>
|
||||
<result column="dim_value_infos" property="dimValueInfos"/>
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
|
||||
<mapper namespace="com.tencent.supersonic.knowledge.infrastructure.custom.DictTaskMapper">
|
||||
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper">
|
||||
|
||||
<resultMap id="DimValueDictTaskPO"
|
||||
type="com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO">
|
||||
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO">
|
||||
<id column="id" property="id"/>
|
||||
<result column="name" property="name"/>
|
||||
<result column="description" property="description"/>
|
||||
|
||||
Reference in New Issue
Block a user