[improvement][project] supersonic 0.7.0 version backend update (#20)

Co-authored-by: kanedai <kanedai@tencent.com>
This commit is contained in:
daikon
2023-07-31 11:09:58 +08:00
committed by GitHub
parent 078a81038f
commit e2b2d31429
675 changed files with 13089 additions and 13536 deletions

View File

@@ -102,6 +102,16 @@
<artifactId>auth-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.tencent.supersonic</groupId>
<artifactId>chat-api</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.tencent.supersonic</groupId>
<artifactId>semantic-query</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

View File

@@ -36,7 +36,7 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
*/
protected V value;
public String prefix = null;
protected String prefix = null;
public BaseNode<V> transition(String path, int begin) {
BaseNode<V> cur = this;

View File

@@ -3,7 +3,7 @@ package com.hankcs.hanlp.seg.common;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import lombok.Data;
import lombok.ToString;

View File

@@ -0,0 +1,69 @@
package com.tencent.supersonic.knowledge;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.service.SchemaService;
import com.tencent.supersonic.knowledge.service.KnowledgeService;
import com.tencent.supersonic.knowledge.service.WordService;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.context.event.ApplicationStartedEvent;
import org.springframework.context.ApplicationListener;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.List;
@Slf4j
@Component
public class ApplicationStartedInit implements ApplicationListener<ApplicationStartedEvent> {
@Autowired
private KnowledgeService knowledgeService;
@Autowired
private WordService wordService;
@Autowired
private SchemaService schemaService;
@Override
public void onApplicationEvent(ApplicationStartedEvent event) {
try {
log.debug("ApplicationStartedInit start");
List<DictWord> dictWords = wordService.getAllDictWords();
wordService.setPreDictWords(dictWords);
knowledgeService.reloadAllData(dictWords);
log.debug("ApplicationStartedInit end");
} catch (Exception e) {
log.error("ApplicationStartedInit error", e);
}
}
/***
* reload knowledge task
*/
@Scheduled(cron = "${reload.knowledge.corn:0 0/1 * * * ?}")
public void reloadKnowledge() {
log.debug("reloadKnowledge start");
try {
List<DictWord> dictWords = wordService.getAllDictWords();
List<DictWord> preDictWords = wordService.getPreDictWords();
if (CollectionUtils.isEqualCollection(dictWords, preDictWords)) {
log.debug("dictWords has not changed, reloadKnowledge end");
return;
}
log.info("dictWords has changed");
wordService.setPreDictWords(dictWords);
knowledgeService.updateOnlineKnowledge(wordService.getAllDictWords());
schemaService.getCache().refresh(SchemaService.ALL_CACHE);
} catch (Exception e) {
log.error("reloadKnowledge error", e);
}
log.debug("reloadKnowledge end");
}
}

View File

@@ -1,52 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
/**
* base word nature
*/
@Slf4j
public abstract class BaseWordNature {
/**
* 获取所有wordNature
*
* @param itemDOS
* @return
*/
public List<WordNature> getWordNatureList(List<ItemDO> itemDOS) {
List<WordNature> wordNatures = new ArrayList<>();
try {
wordNatures = getWordNaturesWithException(itemDOS);
} catch (Exception e) {
log.error("getWordNatureList error,", e);
}
return wordNatures;
}
public List<WordNature> getWordNaturesWithException(List<ItemDO> itemDOS) {
List<WordNature> wordNatures = new ArrayList<>();
for (ItemDO itemDO : itemDOS) {
wordNatures.addAll(getWordNature(itemDO.getName(), itemDO));
}
return wordNatures;
}
public abstract List<WordNature> getWordNature(String word, ItemDO itemDO);
public Integer getElementID(String nature) {
String[] split = nature.split(NatureType.NATURE_SPILT);
if (split.length >= 3) {
return Integer.valueOf(split[2]);
}
return 0;
}
}

View File

@@ -1,49 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* dimension word nature
*/
@Service
public class DimensionWordNature extends BaseWordNature {
@Value("${nlp.dimension.use.suffix:true}")
private boolean nlpDimensionUseSuffix = true;
@Override
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
List<WordNature> result = Lists.newArrayList();
result.add(getOnwWordNature(word, itemDO, false));
if (nlpDimensionUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, itemDO, true));
}
}
return result;
}
private WordNature getOnwWordNature(String word, ItemDO itemDO, boolean isSuffix) {
WordNature wordNature = new WordNature();
wordNature.setWord(word);
Integer domainId = itemDO.getDomain();
String nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
+ NatureType.DIMENSION.getType();
if (isSuffix) {
nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
+ NatureType.SUFFIX.getType() + NatureType.DIMENSION.getType();
}
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
return wordNature;
}
}

View File

@@ -1,30 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* domain word nature
*/
@Service
@Slf4j
public class DomainWordNature extends BaseWordNature {
@Override
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
List<WordNature> result = Lists.newArrayList();
WordNature wordNature = new WordNature();
wordNature.setWord(word);
Integer domainId = itemDO.getDomain();
String nature = NatureType.NATURE_SPILT + domainId;
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
result.add(wordNature);
return result;
}
}

View File

@@ -1,31 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class EntityWordNature extends BaseWordNature {
@Override
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
List<WordNature> result = Lists.newArrayList();
WordNature wordNature = new WordNature();
wordNature.setWord(word);
Integer domain = itemDO.getDomain();
String nature = NatureType.NATURE_SPILT + domain + NatureType.NATURE_SPILT + itemDO.getItemId()
+ NatureType.ENTITY.getType();
wordNature.setNatureWithFrequency(String.format("%s 200000", nature));
result.add(wordNature);
return result;
}
}

View File

@@ -1,48 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* Metric WordNature
*/
@Service
public class MetricWordNature extends BaseWordNature {
@Value("${nlp.metric.use.suffix:true}")
private boolean nlpMetricUseSuffix = true;
@Override
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
List<WordNature> result = Lists.newArrayList();
result.add(getOnwWordNature(word, itemDO, false));
if (nlpMetricUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, itemDO, true));
}
}
return result;
}
private WordNature getOnwWordNature(String word, ItemDO itemDO, boolean isSuffix) {
WordNature wordNature = new WordNature();
wordNature.setWord(word);
Integer domainId = itemDO.getDomain();
String nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
+ NatureType.METRIC.getType();
if (isSuffix) {
nature = NatureType.NATURE_SPILT + domainId + NatureType.NATURE_SPILT + itemDO.getItemId()
+ NatureType.SUFFIX.getType() + NatureType.METRIC.getType();
}
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
return wordNature;
}
}

View File

@@ -1,60 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import com.tencent.supersonic.knowledge.domain.service.OnlineKnowledgeService;
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
import com.tencent.supersonic.knowledge.infrastructure.nlp.Suggester;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
/**
* online knowledge service impl
*/
@Service
public class OnlineKnowledgeServiceImpl implements OnlineKnowledgeService {
private final Logger logger = LoggerFactory.getLogger(OnlineKnowledgeServiceImpl.class);
public void updateSemanticKnowledge(List<WordNature> natures) {
List<WordNature> prefixes = natures.stream()
.filter(entry -> !entry.getNatureWithFrequency().contains(NatureType.SUFFIX.getType()))
.collect(Collectors.toList());
for (WordNature nature : prefixes) {
HanlpHelper.addToCustomDictionary(nature);
}
List<WordNature> suffixes = natures.stream()
.filter(entry -> entry.getNatureWithFrequency().contains(NatureType.SUFFIX.getType()))
.collect(Collectors.toList());
Suggester.loadSuffix(suffixes);
}
public void reloadAllData(List<WordNature> natures) {
// 1. reload custom knowledge
try {
HanlpHelper.reloadCustomDictionary();
} catch (Exception e) {
logger.error("reloadCustomDictionary error", e);
}
// 2. update online knowledge
updateOnlineKnowledge(natures);
}
public void updateOnlineKnowledge(List<WordNature> natures) {
try {
updateSemanticKnowledge(natures);
} catch (Exception e) {
logger.error("updateSemanticKnowledge error", e);
}
}
}

View File

@@ -1,30 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.google.common.collect.Lists;
import com.tencent.supersonic.common.nlp.ItemDO;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class ValueWordNature extends BaseWordNature {
@Override
public List<WordNature> getWordNature(String word, ItemDO itemDO) {
List<WordNature> result = Lists.newArrayList();
WordNature wordNature = new WordNature();
wordNature.setWord(word);
Integer domain = itemDO.getDomain();
String nature = NatureType.NATURE_SPILT + domain + NatureType.NATURE_SPILT + itemDO.getItemId();
wordNature.setNatureWithFrequency(String.format("%s 100000", nature));
result.add(wordNature);
return result;
}
}

View File

@@ -1,28 +0,0 @@
package com.tencent.supersonic.knowledge.application.online;
import com.tencent.supersonic.common.nlp.NatureType;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* WordNature Strategy Factory
*/
public class WordNatureStrategyFactory {
private static Map<NatureType, BaseWordNature> strategyFactory = new ConcurrentHashMap<>();
static {
strategyFactory.put(NatureType.DIMENSION, new DimensionWordNature());
strategyFactory.put(NatureType.METRIC, new MetricWordNature());
strategyFactory.put(NatureType.DOMAIN, new DomainWordNature());
strategyFactory.put(NatureType.ENTITY, new EntityWordNature());
strategyFactory.put(NatureType.VALUE, new ValueWordNature());
}
public static BaseWordNature get(NatureType strategyType) {
return strategyFactory.get(strategyType);
}
}

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
import java.util.List;
import lombok.Data;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
public class DictTaskFilter {

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
public enum DictUpdateMode {

View File

@@ -0,0 +1,34 @@
package com.tencent.supersonic.knowledge.dictionary;
import java.util.Objects;
import lombok.Data;
import lombok.ToString;
/***
* word nature
*/
@Data
@ToString
public class DictWord {
private String word;
private String nature;
private String natureWithFrequency;
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
DictWord that = (DictWord) o;
return Objects.equals(word, that.word) && Objects.equals(natureWithFrequency, that.natureWithFrequency);
}
@Override
public int hashCode() {
return Objects.hash(word, natureWithFrequency);
}
}

View File

@@ -0,0 +1,55 @@
package com.tencent.supersonic.knowledge.dictionary;
import org.apache.commons.lang3.StringUtils;
/***
* nature type
* such as : metric、dimension etc.
*/
public enum DictWordType {
METRIC("metric"),
DIMENSION("dimension"),
VALUE("value"),
DOMAIN("dm"),
ENTITY("entity"),
NUMBER("m"),
SUFFIX("suffix");
public static final String NATURE_SPILT = "_";
public static final String SPACE = " ";
private String type;
DictWordType(String type) {
this.type = type;
}
public String getType() {
return NATURE_SPILT + type;
}
public static DictWordType getNatureType(String nature) {
if (StringUtils.isEmpty(nature) || !nature.startsWith(NATURE_SPILT)) {
return null;
}
for (DictWordType dictWordType : values()) {
if (nature.endsWith(dictWordType.getType())) {
return dictWordType;
}
}
//domain
String[] natures = nature.split(DictWordType.NATURE_SPILT);
if (natures.length == 2 && StringUtils.isNumeric(natures[1])) {
return DOMAIN;
}
//dimension value
if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
return VALUE;
}
return null;
}
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.dictionary;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;

View File

@@ -1,9 +1,11 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.tencent.supersonic.knowledge.dictionary.DictUpdateMode;
import lombok.Data;
@Data

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
import com.tencent.supersonic.common.enums.TaskStatusEnum;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import java.util.Date;
import lombok.Data;

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.knowledge.domain.pojo;
package com.tencent.supersonic.knowledge.dictionary;
import com.tencent.supersonic.common.enums.TypeEnums;
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
import java.util.List;
import javax.validation.constraints.NotNull;
@@ -23,4 +23,4 @@ public class DimValueInfo {
private List<String> whiteList;
private List<String> ruleList;
private Boolean isDictInfo;
}
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.domain;
package com.tencent.supersonic.knowledge.dictionary;
import java.util.List;
@@ -12,15 +12,6 @@ public interface FileHandler {
*/
void backupFile(String fileName);
/**
* move files to a specific directory
* not backup
*
* @param fileName
* @param targetDirectory
*/
void moveFile(String fileName, String targetDirectory);
/**
* create a directory
*

View File

@@ -1,23 +1,22 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.dictionary;
import com.hankcs.hanlp.corpus.io.IIOAdapter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Slf4j
public class HadoopFileIOAdapter implements IIOAdapter {
private static final Logger LOGGER = LoggerFactory.getLogger(HadoopFileIOAdapter.class);
@Override
public InputStream open(String path) throws IOException {
LOGGER.info("open:{}", path);
log.info("open:{}", path);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(path), conf);
return fs.open(new Path(path));
@@ -25,7 +24,7 @@ public class HadoopFileIOAdapter implements IIOAdapter {
@Override
public OutputStream create(String path) throws IOException {
LOGGER.info("create:{}", path);
log.info("create:{}", path);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(path), conf);
return fs.create(new Path(path));

View File

@@ -1,6 +1,6 @@
package com.tencent.supersonic.knowledge.domain;
package com.tencent.supersonic.knowledge.dictionary;
import com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.io.FileNotFoundException;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.domain;
package com.tencent.supersonic.knowledge.dictionary;
import java.io.BufferedWriter;
@@ -44,18 +44,6 @@ public class LocalFileHandler implements FileHandler {
}
@Override
public void moveFile(String filePath, String targetDirectoryPath) {
Path sourcePath = Paths.get(filePath);
Path targetPath = Paths.get(targetDirectoryPath, sourcePath.getFileName().toString());
try {
Files.move(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
log.info("File moved successfully!");
} catch (IOException e) {
log.info("Failed to move file: " + e.getMessage());
}
}
@Override
public void createDir(String directoryPath) {
Path path = Paths.get(directoryPath);
@@ -136,4 +124,4 @@ public class LocalFileHandler implements FileHandler {
}
return Files.newBufferedWriter(Paths.get(filePath), StandardCharsets.UTF_8);
}
}
}

View File

@@ -0,0 +1,52 @@
package com.tencent.supersonic.knowledge.dictionary;
import java.io.Serializable;
import java.util.List;
import java.util.Objects;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
public class MapResult implements Serializable {
private String name;
private List<String> natures;
private int offset = 0;
private double similarity;
private String detectWord;
public MapResult() {
}
public MapResult(String name, List<String> natures, String detectWord) {
this.name = name;
this.natures = natures;
this.detectWord = detectWord;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
MapResult that = (MapResult) o;
return Objects.equals(name, that.name) && Objects.equals(natures, that.natures);
}
@Override
public int hashCode() {
return Objects.hash(name, natures);
}
public void setOffset(int offset) {
this.offset = offset;
}
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.dictionary;
import static com.hankcs.hanlp.utility.Predefine.logger;
@@ -14,6 +14,8 @@ import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.utility.LexiconUtility;
import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
@@ -109,13 +111,13 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
attribute = DictionaryAttributeUtil.getAttribute(map.get(word), attribute);
map.put(word, attribute);
if (addToSuggeterTrie) {
Suggester.put(word, attribute);
SearchService.put(word, attribute);
}
} else {
map.put(word, attribute);
if (addToSuggeterTrie) {
Suggester.put(word, attribute);
SearchService.put(word, attribute);
}
}
}
@@ -125,6 +127,20 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
}
}
public boolean load(String... path) {
this.path = path;
long start = System.currentTimeMillis();
if (!this.loadMainDictionary(path[0])) {
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
return false;
} else {
Predefine.logger.info(
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
this.path = path;
return true;
}
}
/***
* load main dictionary
* @param mainPath
@@ -176,7 +192,7 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
logger.info("正在构建DoubleArrayTrie……");
dat.build(map);
if (addToSuggestTrie) {
// Suggester.save();
// SearchService.save();
}
if (isCache) {
// 缓存成dat文件下次加载会快很多
@@ -219,6 +235,10 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
}
}
public boolean loadMainDictionary(String mainPath) {
return loadMainDictionary(mainPath, this.path, this.dat, true, addToSuggesterTrie);
}
public static boolean loadDat(String path, DoubleArrayTrie<CoreDictionary.Attribute> dat) {
return loadDat(path, HanLP.Config.CustomDictionaryPath, dat);
}
@@ -303,24 +323,6 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
return word;
}
public boolean load(String... path) {
this.path = path;
long start = System.currentTimeMillis();
if (!this.loadMainDictionary(path[0])) {
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
return false;
} else {
Predefine.logger.info(
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
this.path = path;
return true;
}
}
public boolean loadMainDictionary(String mainPath) {
return loadMainDictionary(mainPath, this.path, this.dat, true, addToSuggesterTrie);
}
public boolean reload() {
if (this.path != null && this.path.length != 0) {
IOUtil.deleteFile(this.path[0] + ".bin");
@@ -368,7 +370,7 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
// return true;
}
if (addToSuggesterTrie) {
Suggester.put(word, att);
SearchService.put(word, att);
}
return true;
}

View File

@@ -0,0 +1,49 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import java.util.ArrayList;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import lombok.extern.slf4j.Slf4j;
/**
* base word nature
*/
@Slf4j
public abstract class BaseWordBuilder {
public static final Long DEFAULT_FREQUENCY = 100000L;
public List<DictWord> getDictWords(List<SchemaElement> schemaElements) {
List<DictWord> dictWords = new ArrayList<>();
try {
dictWords = getDictWordsWithException(schemaElements);
} catch (Exception e) {
log.error("getWordNatureList error,", e);
}
return dictWords;
}
protected List<DictWord> getDictWordsWithException(List<SchemaElement> schemaElements) {
List<DictWord> dictWords = new ArrayList<>();
for (SchemaElement schemaElement : schemaElements) {
dictWords.addAll(doGet(schemaElement.getName(), schemaElement));
}
return dictWords;
}
protected abstract List<DictWord> doGet(String word, SchemaElement schemaElement);
public Long getElementID(String nature) {
String[] split = nature.split(DictWordType.NATURE_SPILT);
if (split.length >= 3) {
return Long.valueOf(split[2]);
}
return 0L;
}
}

View File

@@ -0,0 +1,51 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* dimension word nature
*/
@Service
public class DimensionWordBuilder extends BaseWordBuilder {
@Value("${nlp.dimension.use.suffix:true}")
private boolean nlpDimensionUseSuffix = true;
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOnwWordNature(word, schemaElement, false));
if (nlpDimensionUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, schemaElement, true));
}
}
return result;
}
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long domainId = schemaElement.getDomain();
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.DIMENSION.getType();
if (isSuffix) {
nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
}
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord;
}
}

View File

@@ -0,0 +1,32 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* domain word nature
*/
@Service
@Slf4j
public class DomainWordBuilder extends BaseWordBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long domainId = schemaElement.getDomain();
String nature = DictWordType.NATURE_SPILT + domainId;
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
result.add(dictWord);
return result;
}
}

View File

@@ -0,0 +1,33 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class EntityWordBuilder extends BaseWordBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long domain = schemaElement.getDomain();
String nature = DictWordType.NATURE_SPILT + domain + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.ENTITY.getType();
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
result.add(dictWord);
return result;
}
}

View File

@@ -0,0 +1,50 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* Metric DictWord
*/
@Service
public class MetricWordBuilder extends BaseWordBuilder {
@Value("${nlp.metric.use.suffix:true}")
private boolean nlpMetricUseSuffix = true;
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
result.add(getOnwWordNature(word, schemaElement, false));
if (nlpMetricUseSuffix) {
String reverseWord = StringUtils.reverse(word);
if (!word.equalsIgnoreCase(reverseWord)) {
result.add(getOnwWordNature(reverseWord, schemaElement, true));
}
}
return result;
}
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
DictWord dictWord = new DictWord();
dictWord.setWord(word);
Long domainId = schemaElement.getDomain();
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.METRIC.getType();
if (isSuffix) {
nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId()
+ DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
}
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
return dictWord;
}
}

View File

@@ -0,0 +1,41 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.Objects;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
/**
* dimension value wordNature
*/
@Service
@Slf4j
public class ValueWordBuilder extends BaseWordBuilder {
@Override
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
List<DictWord> result = Lists.newArrayList();
if (Objects.nonNull(schemaElement) && !CollectionUtils.isEmpty(schemaElement.getAlias())) {
schemaElement.getAlias().stream().forEach(value -> {
DictWord dictWord = new DictWord();
Long domainId = schemaElement.getDomain();
String nature = DictWordType.NATURE_SPILT + domainId + DictWordType.NATURE_SPILT + schemaElement.getId();
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
dictWord.setWord(value);
result.add(dictWord);
});
}
log.debug("ValueWordBuilder, result:{}", result);
return result;
}
}

View File

@@ -0,0 +1,28 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.builder.*;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* DictWord Strategy Factory
*/
public class WordBuilderFactory {
private static Map<DictWordType, BaseWordBuilder> wordNatures = new ConcurrentHashMap<>();
static {
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
wordNatures.put(DictWordType.DOMAIN, new DomainWordBuilder());
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
}
public static BaseWordBuilder get(DictWordType strategyType) {
return wordNatures.get(strategyType);
}
}

View File

@@ -1,59 +0,0 @@
package com.tencent.supersonic.knowledge.domain.converter;
import com.google.common.base.Strings;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.enums.TaskStatusEnum;
import com.tencent.supersonic.common.util.json.JsonUtil;
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
import com.tencent.supersonic.knowledge.domain.pojo.DimValue2DictCommand;
import com.tencent.supersonic.knowledge.domain.pojo.DimValueInfo;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.List;
public class DictTaskConverter {
private static String dateTimeFormatter = "yyyyMMddHHmmss";
public static DimValueDictTaskPO generateDimValueDictTaskPO(DimValue2DictCommand dimValue2DictCommend, User user) {
DimValueDictTaskPO taskPO = new DimValueDictTaskPO();
Date createAt = new Date();
String date = DateTimeFormatter.ofPattern(dateTimeFormatter)
.format(createAt.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime());
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
String updateMode = dimValue2DictCommend.getUpdateMode().getValue();
String name = String.format("DimValue_dic_%s_%s_%s", updateMode, creator, date);
taskPO.setName(name);
taskPO.setCreatedAt(createAt);
taskPO.setCommand(JsonUtil.toString(dimValue2DictCommend));
taskPO.setStatus(TaskStatusEnum.RUNNING.getCode());
taskPO.setCreatedBy(creator);
return taskPO;
}
public static DictConfPO generateDictConfPO(DictConfig dictConfig, User user) {
DictConfPO dictConfPO = new DictConfPO();
dictConfPO.setDimValueInfos(JsonUtil.toString(dictConfig.getDimValueInfoList()));
dictConfPO.setDomainId(dictConfig.getDomainId());
dictConfPO.setCreatedBy(user.getName());
dictConfPO.setUpdatedBy(user.getName());
dictConfPO.setCreatedAt(new Date());
dictConfPO.setUpdatedAt(new Date());
return dictConfPO;
}
public static DictConfig dictConfPO2Config(DictConfPO dictConfPO) {
DictConfig dictConfig = new DictConfig();
dictConfig.setDomainId(dictConfPO.getDomainId());
List<DimValueInfo> dimValueInfos = JsonUtil.toList(dictConfPO.getDimValueInfos(), DimValueInfo.class);
dictConfig.setDimValueInfoList(dimValueInfos);
return dictConfig;
}
}

View File

@@ -1,25 +0,0 @@
package com.tencent.supersonic.knowledge.domain.repository;
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
import com.tencent.supersonic.knowledge.domain.pojo.DimValueDictInfo;
import java.util.List;
public interface DictRepository {
Long createDimValueDictTask(DimValueDictTaskPO dimValueDictTaskPO);
Boolean updateDictTaskStatus(Integer status, DimValueDictTaskPO dimValueDictTaskPO);
List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter);
Boolean createDictConf(DictConfPO dictConfPO);
Boolean editDictConf(DictConfPO dictConfPO);
Boolean upsertDictInfo(DictConfPO dictConfPO);
DictConfig getDictInfoByDomainId(Long domainId);
}

View File

@@ -1,17 +0,0 @@
package com.tencent.supersonic.knowledge.domain.service;
import com.tencent.supersonic.common.nlp.WordNature;
import java.util.List;
/**
* online knowledge service interface
*/
public interface OnlineKnowledgeService {
void updateSemanticKnowledge(List<WordNature> natures);
void reloadAllData(List<WordNature> natures);
void updateOnlineKnowledge(List<WordNature> natures);
}

View File

@@ -1,17 +0,0 @@
package com.tencent.supersonic.knowledge.infrastructure.custom;
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictConfMapper {
Boolean createDictConf(DictConfPO dictConfPO);
Boolean editDictConf(DictConfPO dictConfPO);
Boolean upsertDictInfo(DictConfPO dictConfPO);
DictConfPO getDictInfoByDomainId(Long domainId);
}

View File

@@ -1,16 +0,0 @@
package com.tencent.supersonic.knowledge.infrastructure.custom;
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
import java.util.List;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictTaskMapper {
Long createDimValueTask(DimValueDictTaskPO dimValueDictTaskPO);
Boolean updateTaskStatus(DimValueDictTaskPO dimValueDictTaskPO);
List<DimValueDictTaskPO> searchDictTaskList(DictTaskFilter filter);
}

View File

@@ -1,93 +0,0 @@
package com.tencent.supersonic.knowledge.infrastructure.repository;
import com.tencent.supersonic.common.enums.TaskStatusEnum;
import com.tencent.supersonic.knowledge.domain.converter.DictTaskConverter;
import com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO;
import com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO;
import com.tencent.supersonic.knowledge.domain.pojo.DictConfig;
import com.tencent.supersonic.knowledge.domain.pojo.DictTaskFilter;
import com.tencent.supersonic.knowledge.domain.pojo.DimValueDictInfo;
import com.tencent.supersonic.knowledge.domain.repository.DictRepository;
import com.tencent.supersonic.knowledge.infrastructure.custom.DictConfMapper;
import com.tencent.supersonic.knowledge.infrastructure.custom.DictTaskMapper;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;
@Repository
public class DictRepositoryImpl implements DictRepository {
private final DictTaskMapper dictTaskMapper;
private final DictConfMapper dictConfMapper;
public DictRepositoryImpl(DictTaskMapper dictTaskMapper,
DictConfMapper dictConfMapper) {
this.dictTaskMapper = dictTaskMapper;
this.dictConfMapper = dictConfMapper;
}
@Override
public Long createDimValueDictTask(DimValueDictTaskPO dimValueDictTaskPO) {
dictTaskMapper.createDimValueTask(dimValueDictTaskPO);
return dimValueDictTaskPO.getId();
}
@Override
public Boolean updateDictTaskStatus(Integer status, DimValueDictTaskPO dimValueDictTaskPO) {
dimValueDictTaskPO.setStatus(status);
Date createdAt = dimValueDictTaskPO.getCreatedAt();
long elapsedMs = System.currentTimeMillis() - createdAt.getTime();
dimValueDictTaskPO.setElapsedMs(elapsedMs);
CompletableFuture.supplyAsync(() -> {
dictTaskMapper.updateTaskStatus(dimValueDictTaskPO);
return null;
});
return true;
}
@Override
public List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter) {
List<DimValueDictInfo> dimValueDictDescList = new ArrayList<>();
List<DimValueDictTaskPO> dimValueDictTaskPOList = dictTaskMapper.searchDictTaskList(filter);
if (!CollectionUtils.isEmpty(dimValueDictTaskPOList)) {
dimValueDictTaskPOList.stream().forEach(dictTaskPO -> {
DimValueDictInfo dimValueDictDesc = new DimValueDictInfo();
BeanUtils.copyProperties(dictTaskPO, dimValueDictDesc);
dimValueDictDesc.setStatus(TaskStatusEnum.of(dictTaskPO.getStatus()));
dimValueDictDescList.add(dimValueDictDesc);
});
}
return dimValueDictDescList;
}
@Override
public Boolean createDictConf(DictConfPO dictConfPO) {
return dictConfMapper.createDictConf(dictConfPO);
}
@Override
public Boolean editDictConf(DictConfPO dictConfPO) {
return dictConfMapper.editDictConf(dictConfPO);
}
@Override
public Boolean upsertDictInfo(DictConfPO dictConfPO) {
return dictConfMapper.upsertDictInfo(dictConfPO);
}
@Override
public DictConfig getDictInfoByDomainId(Long domainId) {
DictConfPO dictConfPO = dictConfMapper.getDictInfoByDomainId(domainId);
if (Objects.isNull(dictConfPO)) {
return null;
}
return DictTaskConverter.dictConfPO2Config(dictConfPO);
}
}

View File

@@ -1,10 +1,10 @@
package com.tencent.supersonic.knowledge.domain.dataobject;
package com.tencent.supersonic.knowledge.persistence.dataobject;
import java.util.Date;
import lombok.Data;
@Data
public class DictConfPO {
public class DictConfDO {
private Long id;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.domain.dataobject;
package com.tencent.supersonic.knowledge.persistence.dataobject;
import java.util.Date;
import lombok.Data;
@@ -7,7 +7,7 @@ import org.apache.commons.codec.digest.DigestUtils;
@Data
@ToString
public class DimValueDictTaskPO {
public class DictTaskDO {
private Long id;

View File

@@ -0,0 +1,17 @@
package com.tencent.supersonic.knowledge.persistence.mapper;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictConfMapper {
Boolean createDictConf(DictConfDO dictConfDO);
Boolean editDictConf(DictConfDO dictConfDO);
Boolean upsertDictInfo(DictConfDO dictConfDO);
DictConfDO getDictInfoByDomainId(Long domainId);
}

View File

@@ -0,0 +1,16 @@
package com.tencent.supersonic.knowledge.persistence.mapper;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import java.util.List;
import org.apache.ibatis.annotations.Mapper;
@Mapper
public interface DictTaskMapper {
Long createDimValueTask(DictTaskDO dictTaskDO);
Boolean updateTaskStatus(DictTaskDO dictTaskDO);
List<DictTaskDO> searchDictTaskList(DictTaskFilter filter);
}

View File

@@ -0,0 +1,19 @@
package com.tencent.supersonic.knowledge.persistence.repository;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
import java.util.List;
public interface DictRepository {
Long createDimValueDictTask(DictTaskDO dictTaskDO);
Boolean updateDictTaskStatus(Integer status, DictTaskDO dictTaskDO);
List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter);
DictConfig getDictInfoByDomainId(Long domainId);
}

View File

@@ -0,0 +1,77 @@
package com.tencent.supersonic.knowledge.persistence.repository;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.utils.DictTaskConverter;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
import com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper;
import com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;
@Repository
public class DictRepositoryImpl implements DictRepository {
private final DictTaskMapper dictTaskMapper;
private final DictConfMapper dictConfMapper;
public DictRepositoryImpl(DictTaskMapper dictTaskMapper,
DictConfMapper dictConfMapper) {
this.dictTaskMapper = dictTaskMapper;
this.dictConfMapper = dictConfMapper;
}
@Override
public Long createDimValueDictTask(DictTaskDO dictTaskDO) {
dictTaskMapper.createDimValueTask(dictTaskDO);
return dictTaskDO.getId();
}
@Override
public Boolean updateDictTaskStatus(Integer status, DictTaskDO dictTaskDO) {
dictTaskDO.setStatus(status);
Date createdAt = dictTaskDO.getCreatedAt();
long elapsedMs = System.currentTimeMillis() - createdAt.getTime();
dictTaskDO.setElapsedMs(elapsedMs);
CompletableFuture.supplyAsync(() -> {
dictTaskMapper.updateTaskStatus(dictTaskDO);
return null;
});
return true;
}
@Override
public List<DimValueDictInfo> searchDictTaskList(DictTaskFilter filter) {
List<DimValueDictInfo> dimValueDictDescList = new ArrayList<>();
List<DictTaskDO> dictTaskDOList = dictTaskMapper.searchDictTaskList(filter);
if (!CollectionUtils.isEmpty(dictTaskDOList)) {
dictTaskDOList.stream().forEach(dictTaskPO -> {
DimValueDictInfo dimValueDictDesc = new DimValueDictInfo();
BeanUtils.copyProperties(dictTaskPO, dimValueDictDesc);
dimValueDictDesc.setStatus(TaskStatusEnum.of(dictTaskPO.getStatus()));
dimValueDictDescList.add(dimValueDictDesc);
});
}
return dimValueDictDescList;
}
@Override
public DictConfig getDictInfoByDomainId(Long domainId) {
DictConfDO dictConfDO = dictConfMapper.getDictInfoByDomainId(domainId);
if (Objects.isNull(dictConfDO)) {
return null;
}
return DictTaskConverter.dictConfPO2Config(dictConfDO);
}
}

View File

@@ -0,0 +1,106 @@
package com.tencent.supersonic.knowledge.semantic;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.tencent.supersonic.chat.api.component.SemanticLayer;
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
import com.tencent.supersonic.common.pojo.Aggregator;
import com.tencent.supersonic.common.pojo.Order;
import com.tencent.supersonic.common.pojo.ResultData;
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.util.CollectionUtils;
@Slf4j
public abstract class BaseSemanticLayer implements SemanticLayer {
protected final Cache<String, List<DomainSchemaResp>> domainSchemaCache =
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
protected ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
};
@SneakyThrows
public List<DomainSchemaResp> fetchDomainSchema(List<Long> ids, Boolean cacheEnable) {
if (cacheEnable) {
return domainSchemaCache.get(String.valueOf(ids), () -> {
List<DomainSchemaResp> data = doFetchDomainSchema(ids);
return data;
});
}
List<DomainSchemaResp> data = doFetchDomainSchema(ids);
return data;
}
@Override
public DomainSchema getDomainSchema(Long domain, Boolean cacheEnable) {
List<Long> ids = new ArrayList<>();
ids.add(domain);
List<DomainSchemaResp> domainSchemaResps = fetchDomainSchema(ids, cacheEnable);
if (!CollectionUtils.isEmpty(domainSchemaResps)) {
Optional<DomainSchemaResp> domainSchemaResp = domainSchemaResps.stream()
.filter(d -> d.getId().equals(domain)).findFirst();
if (domainSchemaResp.isPresent()) {
DomainSchemaResp domainSchema = domainSchemaResp.get();
return DomainSchemaBuilder.build(domainSchema);
}
}
return null;
}
@Override
public List<DomainSchema> getDomainSchema() {
return getDomainSchema(new ArrayList<>());
}
@Override
public List<DomainSchema> getDomainSchema(List<Long> ids) {
List<DomainSchema> domainSchemaList = new ArrayList<>();
for(DomainSchemaResp resp : fetchDomainSchema(ids, true)) {
domainSchemaList.add(DomainSchemaBuilder.build(resp));
}
return domainSchemaList;
}
protected void deletionDuplicated(QueryStructReq queryStructReq) {
if (!CollectionUtils.isEmpty(queryStructReq.getGroups()) && queryStructReq.getGroups().size() > 1) {
Set<String> groups = new HashSet<>();
groups.addAll(queryStructReq.getGroups());
queryStructReq.getGroups().clear();
queryStructReq.getGroups().addAll(groups);
}
}
protected void onlyQueryFirstMetric(QueryStructReq queryStructReq) {
if (!CollectionUtils.isEmpty(queryStructReq.getAggregators()) && queryStructReq.getAggregators().size() > 1) {
log.info("multi metric in aggregators:{} , only query first one", queryStructReq.getAggregators());
List<Aggregator> aggregators = queryStructReq.getAggregators().subList(0, 1);
List<String> excludeAggregators = queryStructReq.getAggregators().stream().map(a -> a.getColumn())
.filter(a -> !a.equals(aggregators.get(0).getColumn())).collect(
Collectors.toList());
queryStructReq.setAggregators(aggregators);
List<Order> orders = queryStructReq.getOrders().stream()
.filter(o -> !excludeAggregators.contains(o.getColumn())).collect(
Collectors.toList());
log.info("multi metric in orders:{} ", queryStructReq.getOrders());
queryStructReq.setOrders(orders);
}
}
protected abstract List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids);
}

View File

@@ -0,0 +1,40 @@
package com.tencent.supersonic.knowledge.semantic;
import lombok.Data;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
@Data
public class DefaultSemanticConfig {
@Value("${semantic.url.prefix:http://localhost:8081}")
private String semanticUrl;
@Value("${searchByStruct.path:/api/semantic/query/struct}")
private String searchByStructPath;
@Value("${searchByStruct.path:/api/semantic/query/multiStruct}")
private String searchByMultiStructPath;
@Value("${searchByStruct.path:/api/semantic/query/sql}")
private String searchBySqlPath;
@Value("${fetchDomainSchemaPath.path:/api/semantic/schema}")
private String fetchDomainSchemaPath;
@Value("${fetchDomainList.path:/api/semantic/schema/dimension/page}")
private String fetchDimensionPagePath;
@Value("${fetchDomainList.path:/api/semantic/schema/metric/page}")
private String fetchMetricPagePath;
@Value("${fetchDomainList.path:/api/semantic/schema/domain/list}")
private String fetchDomainListPath;
@Value("${fetchDomainList.path:/api/semantic/schema/domain/view/list}")
private String fetchDomainViewListPath;
}

View File

@@ -0,0 +1,119 @@
package com.tencent.supersonic.knowledge.semantic;
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.semantic.api.model.pojo.DimValueMap;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
import java.util.*;
public class DomainSchemaBuilder {
public static DomainSchema build(DomainSchemaResp resp) {
DomainSchema domainSchema = new DomainSchema();
SchemaElement domain = SchemaElement.builder()
.domain(resp.getId())
.id(resp.getId())
.name(resp.getName())
.bizName(resp.getBizName())
.type(SchemaElementType.DOMAIN)
.build();
domainSchema.setDomain(domain);
Set<SchemaElement> metrics = new HashSet<>();
for (MetricSchemaResp metric : resp.getMetrics()) {
SchemaElement metricToAdd = SchemaElement.builder()
.domain(resp.getId())
.id(metric.getId())
.name(metric.getName())
.bizName(metric.getBizName())
.type(SchemaElementType.METRIC)
.useCnt(metric.getUseCnt())
.build();
metrics.add(metricToAdd);
String alias = metric.getAlias();
if (StringUtils.isNotEmpty(alias)) {
SchemaElement alisMetricToAdd = new SchemaElement();
BeanUtils.copyProperties(metricToAdd, alisMetricToAdd);
alisMetricToAdd.setName(alias);
metrics.add(alisMetricToAdd);
}
}
domainSchema.getMetrics().addAll(metrics);
Set<SchemaElement> dimensions = new HashSet<>();
Set<SchemaElement> dimensionValues = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
Set<String> dimValueAlias = new HashSet<>();
if (!CollectionUtils.isEmpty(dim.getDimValueMaps())) {
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
for (DimValueMap dimValueMap : dimValueMaps) {
if (Strings.isNotEmpty(dimValueMap.getBizName())) {
dimValueAlias.add(dimValueMap.getBizName());
}
if (!CollectionUtils.isEmpty(dimValueMap.getAlias())) {
dimValueAlias.addAll(dimValueMap.getAlias());
}
}
}
SchemaElement dimToAdd = SchemaElement.builder()
.domain(resp.getId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.DIMENSION)
.useCnt(dim.getUseCnt())
.build();
dimensions.add(dimToAdd);
String alias = dim.getAlias();
if (StringUtils.isNotEmpty(alias)) {
SchemaElement alisDimToAdd = new SchemaElement();
BeanUtils.copyProperties(dimToAdd, alisDimToAdd);
alisDimToAdd.setName(alias);
dimensions.add(alisDimToAdd);
}
SchemaElement dimValueToAdd = SchemaElement.builder()
.domain(resp.getId())
.id(dim.getId())
.name(dim.getName())
.bizName(dim.getBizName())
.type(SchemaElementType.VALUE)
.useCnt(dim.getUseCnt())
.alias(new ArrayList<>(Arrays.asList(dimValueAlias.toArray(new String[0]))))
.build();
dimensionValues.add(dimValueToAdd);
}
domainSchema.getDimensions().addAll(dimensions);
domainSchema.getDimensionValues().addAll(dimensionValues);
if (!CollectionUtils.isEmpty(resp.getEntityNames())) {
Set<SchemaElement> entities = new HashSet<>();
for (String entity : resp.getEntityNames()) {
entities.add(SchemaElement.builder()
.domain(resp.getId())
.id(resp.getId())
.name(entity)
.bizName(entity)
.type(SchemaElementType.ENTITY)
.build());
}
domainSchema.getEntities().addAll(entities);
}
return domainSchema;
}
}

View File

@@ -0,0 +1,116 @@
package com.tencent.supersonic.knowledge.semantic;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.common.util.S2ThreadContext;
import com.tencent.supersonic.common.util.ThreadContext;
import com.tencent.supersonic.semantic.api.model.request.DomainSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.DomainSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import com.tencent.supersonic.semantic.model.domain.DimensionService;
import com.tencent.supersonic.semantic.model.domain.DomainService;
import com.tencent.supersonic.semantic.model.domain.MetricService;
import com.tencent.supersonic.semantic.query.service.QueryService;
import com.tencent.supersonic.semantic.query.service.SchemaService;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class LocalSemanticLayer extends BaseSemanticLayer {
private SchemaService schemaService;
private S2ThreadContext s2ThreadContext;
private DomainService domainService;
private DimensionService dimensionService;
private MetricService metricService;
@Override
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
deletionDuplicated(queryStructReq);
onlyQueryFirstMetric(queryStructReq);
try {
QueryService queryService = ContextUtils.getBean(QueryService.class);
QueryResultWithSchemaResp queryResultWithSchemaResp = queryService.queryByStruct(queryStructReq, user);
return queryResultWithSchemaResp;
} catch (Exception e) {
log.info("queryByStruct has an exception:{}", e.toString());
}
return null;
}
@Override
public QueryResultWithSchemaResp queryByMultiStruct(QueryMultiStructReq queryMultiStructReq, User user) {
for (QueryStructReq queryStructReq : queryMultiStructReq.getQueryStructReqs()) {
deletionDuplicated(queryStructReq);
onlyQueryFirstMetric(queryStructReq);
}
try {
QueryService queryService = ContextUtils.getBean(QueryService.class);
return queryService.queryByMultiStruct(queryMultiStructReq, user);
} catch (Exception e) {
log.info("queryByMultiStruct has an exception:{}", e);
}
return null;
}
@Override
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
try {
QueryService queryService = ContextUtils.getBean(QueryService.class);
Object object = queryService.queryBySql(queryDslReq, user);
QueryResultWithSchemaResp queryResultWithSchemaResp = JsonUtil.toObject(JsonUtil.toString(object),
QueryResultWithSchemaResp.class);
return queryResultWithSchemaResp;
} catch (Exception e) {
log.info("queryByDsl has an exception:{}", e);
}
return null;
}
@Override
public List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids) {
DomainSchemaFilterReq filter = new DomainSchemaFilterReq();
filter.setDomainIds(ids);
User user = new User(1L, "admin", "admin", "admin@email");
schemaService = ContextUtils.getBean(SchemaService.class);
return schemaService.fetchDomainSchema(filter, user);
}
@Override
public List<DomainResp> getDomainListForViewer() {
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
ThreadContext threadContext = s2ThreadContext.get();
domainService = ContextUtils.getBean(DomainService.class);
return domainService.getDomainListForViewer(threadContext.getUserName());
}
@Override
public List<DomainResp> getDomainListForAdmin() {
domainService = ContextUtils.getBean(DomainService.class);
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
ThreadContext threadContext = s2ThreadContext.get();
return domainService.getDomainListForAdmin(threadContext.getUserName());
}
@Override
public PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionCmd) {
dimensionService = ContextUtils.getBean(DimensionService.class);
return dimensionService.queryDimension(pageDimensionCmd);
}
@Override
public PageInfo<MetricResp> getMetricPage(PageMetricReq pageMetricCmd) {
metricService = ContextUtils.getBean(MetricService.class);
return metricService.queryMetric(pageMetricCmd);
}
}

View File

@@ -0,0 +1,245 @@
package com.tencent.supersonic.knowledge.semantic;
import com.alibaba.fastjson.JSON;
import com.github.pagehelper.PageInfo;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.gson.Gson;
import com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig;
import com.tencent.supersonic.auth.api.authentication.constant.UserConstants;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.S2ThreadContext;
import com.tencent.supersonic.common.util.ThreadContext;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.semantic.api.model.request.DomainSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
import com.tencent.supersonic.semantic.api.model.response.*;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import com.tencent.supersonic.common.pojo.exception.CommonException;
import com.tencent.supersonic.common.pojo.ResultData;
import com.tencent.supersonic.common.pojo.ReturnCode;
import java.net.URI;
import java.util.*;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.util.UriComponentsBuilder;
import static com.tencent.supersonic.common.pojo.Constants.*;
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
@Slf4j
public class RemoteSemanticLayer extends BaseSemanticLayer {
@Autowired
private S2ThreadContext s2ThreadContext;
@Autowired
private AuthenticationConfig authenticationConfig;
private static final Cache<String, List<DomainSchemaResp>> domainSchemaCache =
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
private ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
};
@Override
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
deletionDuplicated(queryStructReq);
onlyQueryFirstMetric(queryStructReq);
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
return searchByRestTemplate(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchByStructPath(),
new Gson().toJson(queryStructReq));
}
@Override
public QueryResultWithSchemaResp queryByMultiStruct(QueryMultiStructReq queryMultiStructReq, User user) {
for (QueryStructReq queryStructReq : queryMultiStructReq.getQueryStructReqs()) {
deletionDuplicated(queryStructReq);
onlyQueryFirstMetric(queryStructReq);
}
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
return searchByRestTemplate(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchByMultiStructPath(),
new Gson().toJson(queryMultiStructReq));
}
@Override
public QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user) {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
return searchByRestTemplate(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getSearchBySqlPath(),
new Gson().toJson(queryDslReq));
}
public QueryResultWithSchemaResp searchByRestTemplate(String url, String jsonReq) {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
fillToken(headers);
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
HttpEntity<String> entity = new HttpEntity<>(jsonReq, headers);
log.info("url:{},searchByRestTemplate:{}", url, entity.getBody());
ResultData<QueryResultWithSchemaResp> responseBody;
try {
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
ResponseEntity<ResultData<QueryResultWithSchemaResp>> responseEntity = restTemplate.exchange(
requestUrl, HttpMethod.POST, entity, structTypeRef);
responseBody = responseEntity.getBody();
log.info("ApiResponse<QueryResultWithColumns> responseBody:{}", responseBody);
QueryResultWithSchemaResp semanticQuery = new QueryResultWithSchemaResp();
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
QueryResultWithSchemaResp data = responseBody.getData();
semanticQuery.setColumns(data.getColumns());
semanticQuery.setResultList(data.getResultList());
semanticQuery.setSql(data.getSql());
semanticQuery.setQueryAuthorization(data.getQueryAuthorization());
return semanticQuery;
}
} catch (Exception e) {
throw new RuntimeException("search semantic interface error,url:" + url, e);
}
throw new CommonException(responseBody.getCode(), responseBody.getMsg());
}
public List<DomainSchemaResp> doFetchDomainSchema(List<Long> ids) {
HttpHeaders headers = new HttpHeaders();
headers.set(UserConstants.INTERNAL, TRUE_LOWER);
headers.setContentType(MediaType.APPLICATION_JSON);
fillToken(headers);
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
URI requestUrl = UriComponentsBuilder.fromHttpUrl(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainSchemaPath()).build()
.encode().toUri();
DomainSchemaFilterReq filter = new DomainSchemaFilterReq();
filter.setDomainIds(ids);
ParameterizedTypeReference<ResultData<List<DomainSchemaResp>>> responseTypeRef =
new ParameterizedTypeReference<ResultData<List<DomainSchemaResp>>>() {
};
HttpEntity<String> entity = new HttpEntity<>(JSON.toJSONString(filter), headers);
try {
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
ResponseEntity<ResultData<List<DomainSchemaResp>>> responseEntity = restTemplate.exchange(
requestUrl, HttpMethod.POST, entity, responseTypeRef);
ResultData<List<DomainSchemaResp>> responseBody = responseEntity.getBody();
log.debug("ApiResponse<fetchDomainSchema> responseBody:{}", responseBody);
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
List<DomainSchemaResp> data = responseBody.getData();
return data;
}
} catch (Exception e) {
throw new RuntimeException("fetchDomainSchema interface error", e);
}
throw new RuntimeException("fetchDomainSchema interface error");
}
@Override
public List<DomainResp> getDomainListForViewer() {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
Object domainDescListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainViewListPath(), null, HttpMethod.GET);
List<DomainResp> domainDescList = JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
return domainDescList;
}
@Override
public List<DomainResp> getDomainListForAdmin() {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
Object domainDescListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(), null, HttpMethod.GET);
List<DomainResp> domainDescList = JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
return domainDescList;
}
public Object fetchHttpResult(String url, String bodyJson, HttpMethod httpMethod) {
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.APPLICATION_JSON);
fillToken(headers);
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
ParameterizedTypeReference<ResultData<Object>> responseTypeRef =
new ParameterizedTypeReference<ResultData<Object>>() {
};
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(bodyJson), headers);
try {
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
ResponseEntity<ResultData<Object>> responseEntity = restTemplate.exchange(requestUrl,
httpMethod, entity, responseTypeRef);
ResultData<Object> responseBody = responseEntity.getBody();
log.debug("ApiResponse<fetchDomainSchema> responseBody:{}", responseBody);
if (ReturnCode.SUCCESS.getCode() == responseBody.getCode()) {
Object data = responseBody.getData();
return data;
}
} catch (Exception e) {
throw new RuntimeException("fetchDomainSchema interface error", e);
}
throw new RuntimeException("fetchDomainSchema interface error");
}
public void fillToken(HttpHeaders headers) {
s2ThreadContext = ContextUtils.getBean(S2ThreadContext.class);
authenticationConfig = ContextUtils.getBean(AuthenticationConfig.class);
ThreadContext threadContext = s2ThreadContext.get();
if (Objects.nonNull(threadContext) && Strings.isNotEmpty(threadContext.getToken())) {
if (Objects.nonNull(authenticationConfig) && Strings.isNotEmpty(
authenticationConfig.getTokenHttpHeaderKey())) {
headers.set(authenticationConfig.getTokenHttpHeaderKey(), threadContext.getToken());
}
} else {
log.debug("threadContext is null:{}", Objects.isNull(threadContext));
}
}
@Override
public PageInfo<MetricResp> getMetricPage(PageMetricReq pageMetricCmd) {
String body = JsonUtil.toString(pageMetricCmd);
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
log.info("url:{}", defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath());
Object dimensionListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(), body, HttpMethod.POST);
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
PageInfo<Object> metricDescObjectPageInfo = generatePageInfo(map);
PageInfo<MetricResp> metricDescPageInfo = new PageInfo<>();
BeanUtils.copyProperties(metricDescObjectPageInfo, metricDescPageInfo);
metricDescPageInfo.setList(metricDescPageInfo.getList());
return metricDescPageInfo;
}
@Override
public PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionCmd) {
String body = JsonUtil.toString(pageDimensionCmd);
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
Object dimensionListObject = fetchHttpResult(defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(), body, HttpMethod.POST);
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
PageInfo<Object> dimensionDescObjectPageInfo = generatePageInfo(map);
PageInfo<DimensionResp> dimensionDescPageInfo = new PageInfo<>();
BeanUtils.copyProperties(dimensionDescObjectPageInfo, dimensionDescPageInfo);
dimensionDescPageInfo.setList(dimensionDescPageInfo.getList());
return dimensionDescPageInfo;
}
private PageInfo<Object> generatePageInfo(LinkedHashMap map) {
PageInfo<Object> pageInfo = new PageInfo<>();
pageInfo.setList((List<Object>) map.get(LIST_LOWER));
Integer total = (Integer) map.get(TOTAL_LOWER);
pageInfo.setTotal(total);
Integer pageSize = (Integer) map.get(PAGESIZE_LOWER);
pageInfo.setPageSize(pageSize);
pageInfo.setPages((int) Math.ceil((double) total / pageSize));
return pageInfo;
}
}

View File

@@ -0,0 +1,15 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import java.util.List;
public interface KnowledgeService {
void updateSemanticKnowledge(List<DictWord> natures);
void reloadAllData(List<DictWord> natures);
void updateOnlineKnowledge(List<DictWord> natures);
}

View File

@@ -0,0 +1,55 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.util.List;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
@Service
@Slf4j
public class KnowledgeServiceImpl implements KnowledgeService {
public void updateSemanticKnowledge(List<DictWord> natures) {
List<DictWord> prefixes = natures.stream()
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList());
for (DictWord nature : prefixes) {
HanlpHelper.addToCustomDictionary(nature);
}
List<DictWord> suffixes = natures.stream()
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
.collect(Collectors.toList());
SearchService.loadSuffix(suffixes);
}
public void reloadAllData(List<DictWord> natures) {
// 1. reload custom knowledge
try {
HanlpHelper.reloadCustomDictionary();
} catch (Exception e) {
log.error("reloadCustomDictionary error", e);
}
// 2. update online knowledge
updateOnlineKnowledge(natures);
}
public void updateOnlineKnowledge(List<DictWord> natures) {
try {
updateSemanticKnowledge(natures);
} catch (Exception e) {
log.error("updateSemanticKnowledge error", e);
}
}
}

View File

@@ -0,0 +1,47 @@
package com.tencent.supersonic.knowledge.service;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.tencent.supersonic.chat.api.component.SemanticLayer;
import com.tencent.supersonic.chat.api.pojo.DomainSchema;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.concurrent.TimeUnit;
@Service
@Slf4j
public class SchemaService {
private static final Integer META_CACHE_TIME = 5;
public static final String ALL_CACHE = "all";
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()
.expireAfterWrite(META_CACHE_TIME, TimeUnit.MINUTES)
.build(
new CacheLoader<String, SemanticSchema>() {
@Override
public SemanticSchema load(String key) {
log.info("load getDomainSchemaInfo cache [{}]", key);
return new SemanticSchema(semanticLayer.getDomainSchema());
}
}
);
public DomainSchema getDomainSchema(Long id) {
return semanticLayer.getDomainSchema(id, true);
}
public SemanticSchema getSemanticSchema() {
return cache.getUnchecked(ALL_CACHE);
}
public LoadingCache<String, SemanticSchema> getCache() {
return cache;
}
}

View File

@@ -1,12 +1,13 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.service;
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.tencent.supersonic.common.nlp.MapResult;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
@@ -14,17 +15,18 @@ import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
import com.tencent.supersonic.knowledge.dictionary.DictionaryAttributeUtil;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
@Service
public class Suggester {
@Slf4j
public class SearchService {
public static final int SEARCH_SIZE = 200;
private static final Logger LOGGER = LoggerFactory.getLogger(Suggester.class);
private static BinTrie<List<String>> trie;
private static BinTrie<List<String>> suffixTrie;
private static String localFileCache = "";
@@ -75,7 +77,7 @@ public class Suggester {
entry -> {
String name = entry.getKey().replace("#", " ");
List<String> natures = entry.getValue().stream()
.map(nature -> nature.replaceAll(NatureType.SUFFIX.getType(), ""))
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
.collect(Collectors.toList());
name = StringUtils.reverse(name);
return new MapResult(name, natures, key);
@@ -107,7 +109,7 @@ public class Suggester {
}
public static void clear() {
LOGGER.info("clear all trie");
log.info("clear all trie");
trie = new BinTrie<>();
suffixTrie = new BinTrie<>();
}
@@ -117,12 +119,12 @@ public class Suggester {
}
public static void loadSuffix(List<WordNature> suffixes) {
public static void loadSuffix(List<DictWord> suffixes) {
if (CollectionUtils.isEmpty(suffixes)) {
return;
}
TreeMap<String, CoreDictionary.Attribute> map = new TreeMap();
for (WordNature suffix : suffixes) {
for (DictWord suffix : suffixes) {
CoreDictionary.Attribute attributeNew = suffix.getNatureWithFrequency() == null
? new CoreDictionary.Attribute(Nature.nz, 1)
: CoreDictionary.Attribute.create(suffix.getNatureWithFrequency());

View File

@@ -0,0 +1,52 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.chat.api.component.SemanticLayer;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.builder.WordBuilderFactory;
import java.util.ArrayList;
import java.util.List;
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
@Service
@Slf4j
public class WordService {
private List<DictWord> preDictWords = new ArrayList<>();
public List<DictWord> getAllDictWords() {
SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
SemanticSchema semanticSchema = new SemanticSchema(semanticLayer.getDomainSchema());
List<DictWord> words = new ArrayList<>();
addWordsByType(DictWordType.DIMENSION, semanticSchema.getDimensions(), words);
addWordsByType(DictWordType.METRIC, semanticSchema.getMetrics(), words);
addWordsByType(DictWordType.DOMAIN, semanticSchema.getDomains(), words);
addWordsByType(DictWordType.ENTITY, semanticSchema.getEntities(), words);
addWordsByType(DictWordType.VALUE, semanticSchema.getDimensionValues(), words);
return words;
}
private void addWordsByType(DictWordType value, List<SchemaElement> metas, List<DictWord> natures) {
List<DictWord> natureList = WordBuilderFactory.get(value).getDictWords(metas);
log.debug("nature type:{} , nature size:{}", value.name(), natureList.size());
natures.addAll(natureList);
}
public List<DictWord> getPreDictWords() {
return preDictWords;
}
public void setPreDictWords(List<DictWord> preDictWords) {
this.preDictWords = preDictWords;
}
}

View File

@@ -0,0 +1,34 @@
package com.tencent.supersonic.knowledge.utils;
import com.tencent.supersonic.chat.api.component.SemanticLayer;
import org.springframework.core.io.support.SpringFactoriesLoader;
import java.util.List;
import java.util.Objects;
public class ComponentFactory {
private static SemanticLayer semanticLayer;
public static SemanticLayer getSemanticLayer() {
if (Objects.isNull(semanticLayer)) {
semanticLayer = init(SemanticLayer.class);
}
return semanticLayer;
}
public static void setSemanticLayer(SemanticLayer layer) {
semanticLayer = layer;
}
private static <T> List<T> init(Class<T> factoryType, List list) {
list.addAll(SpringFactoriesLoader.loadFactories(factoryType,
Thread.currentThread().getContextClassLoader()));
return list;
}
private static <T> T init(Class<T> factoryType) {
return SpringFactoriesLoader.loadFactories(factoryType,
Thread.currentThread().getContextClassLoader()).get(0);
}
}

View File

@@ -0,0 +1,47 @@
package com.tencent.supersonic.knowledge.utils;
import com.google.common.base.Strings;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
import com.tencent.supersonic.knowledge.dictionary.DimValue2DictCommand;
import com.tencent.supersonic.knowledge.dictionary.DimValueInfo;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Date;
import java.util.List;
public class DictTaskConverter {
private static String dateTimeFormatter = "yyyyMMddHHmmss";
public static DictTaskDO generateDimValueDictTaskPO(DimValue2DictCommand dimValue2DictCommend, User user) {
DictTaskDO taskPO = new DictTaskDO();
Date createAt = new Date();
String date = DateTimeFormatter.ofPattern(dateTimeFormatter)
.format(createAt.toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime());
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
String updateMode = dimValue2DictCommend.getUpdateMode().getValue();
String name = String.format("DimValue_dic_%s_%s_%s", updateMode, creator, date);
taskPO.setName(name);
taskPO.setCreatedAt(createAt);
taskPO.setCommand(JsonUtil.toString(dimValue2DictCommend));
taskPO.setStatus(TaskStatusEnum.RUNNING.getCode());
taskPO.setCreatedBy(creator);
return taskPO;
}
public static DictConfig dictConfPO2Config(DictConfDO dictConfDO) {
DictConfig dictConfig = new DictConfig();
dictConfig.setDomainId(dictConfDO.getDomainId());
List<DimValueInfo> dimValueInfos = JsonUtil.toList(dictConfDO.getDimValueInfos(), DimValueInfo.class);
dictConfig.setDimValueInfoList(dimValueInfos);
return dictConfig;
}
}

View File

@@ -1,19 +1,19 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.utils;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
import static com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper.FILE_SPILT;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import lombok.extern.slf4j.Slf4j;
@Slf4j
public class FileHelper {
private static final Logger LOGGER = LoggerFactory.getLogger(FileHelper.class);
public static final String FILE_SPILT = "/";
public static void deleteCacheFile(String[] path) throws IOException {
@@ -25,9 +25,9 @@ public class FileHelper {
for (File file : customSubFiles) {
try {
file.delete();
LOGGER.info("customPath:{},delete cache file:{}", customPath, file);
log.info("customPath:{},delete cache file:{}", customPath, file);
} catch (Exception e) {
LOGGER.error("delete " + file, e);
log.error("delete " + file, e);
}
}
}
@@ -70,7 +70,7 @@ public class FileHelper {
}
}
LOGGER.info("CustomDictionaryPath:{}", fileList);
log.info("CustomDictionaryPath:{}", fileList);
CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
: CustomDictionaryPath;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.utils;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
@@ -7,30 +7,33 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.common.nlp.MapResult;
import com.tencent.supersonic.common.nlp.NatureType;
import com.tencent.supersonic.common.nlp.WordNature;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;
import org.springframework.util.ResourceUtils;
/**
* HanLP helper
*/
@Slf4j
public class HanlpHelper {
public static final String FILE_SPILT = "/";
public static final String SPACE_SPILT = "#";
public static final String DICT_MAIN_FILE_NAME = "CustomDictionary.txt";
public static final String DICT_CLASS = "classes";
private static final Logger LOGGER = LoggerFactory.getLogger(HanlpHelper.class);
public static volatile DynamicCustomDictionary CustomDictionary;
private static volatile DynamicCustomDictionary CustomDictionary;
private static volatile Segment segment;
static {
@@ -38,7 +41,7 @@ public class HanlpHelper {
try {
resetHanlpConfig();
} catch (FileNotFoundException e) {
LOGGER.error("resetHanlpConfig error", e);
log.error("resetHanlpConfig error", e);
}
}
@@ -76,7 +79,7 @@ public class HanlpHelper {
*/
public static boolean reloadCustomDictionary() throws IOException {
LOGGER.info("reloadCustomDictionary start");
log.info("reloadCustomDictionary start");
final long startTime = System.currentTimeMillis();
@@ -93,10 +96,10 @@ public class HanlpHelper {
FileHelper.resetCustomPath(getDynamicCustomDictionary());
}
// 3.clear trie
Suggester.clear();
SearchService.clear();
boolean reload = getDynamicCustomDictionary().reload();
LOGGER.info("reloadCustomDictionary end ,cost:{},reload:{}", System.currentTimeMillis() - startTime, reload);
log.info("reloadCustomDictionary end ,cost:{},reload:{}", System.currentTimeMillis() - startTime, reload);
return reload;
}
@@ -108,7 +111,7 @@ public class HanlpHelper {
CustomDictionaryPath = Arrays.stream(CustomDictionaryPath).map(path -> hanlpPropertiesPath + FILE_SPILT + path)
.toArray(String[]::new);
LOGGER.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath, CustomDictionaryPath);
log.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath, CustomDictionaryPath);
HanLP.Config.CoreDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath = hanlpPropertiesPath + FILE_SPILT
@@ -155,9 +158,9 @@ public class HanlpHelper {
return ResourceUtils.getFile("classpath:hanlp.properties").getParent();
}
public static boolean addToCustomDictionary(WordNature wordNature) {
LOGGER.info("wordNature:{}", wordNature);
return getDynamicCustomDictionary().insert(wordNature.getWord(), wordNature.getNatureWithFrequency());
public static boolean addToCustomDictionary(DictWord dictWord) {
log.info("dictWord:{}", dictWord);
return getDynamicCustomDictionary().insert(dictWord.getWord(), dictWord.getNatureWithFrequency());
}
public static void transLetterOriginal(List<MapResult> mapResults) {
@@ -178,7 +181,7 @@ public class HanlpHelper {
public static List<Term> getTerms(String text) {
return getSegment().seg(text.toLowerCase()).stream()
.filter(term -> term.getNature().startsWith(NatureType.NATURE_SPILT))
.filter(term -> term.getNature().startsWith(DictWordType.NATURE_SPILT))
.collect(Collectors.toList());
}

View File

@@ -1,7 +1,6 @@
package com.tencent.supersonic.knowledge.infrastructure.nlp;
package com.tencent.supersonic.knowledge.utils;
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
import static com.tencent.supersonic.knowledge.infrastructure.nlp.HanlpHelper.FILE_SPILT;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.utility.Predefine;
@@ -9,20 +8,19 @@ import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Hdfs File Helper
*/
@Slf4j
public class HdfsFileHelper {
private static final Logger LOGGER = LoggerFactory.getLogger(HdfsFileHelper.class);
/***
* delete cache file
* @param path
@@ -31,24 +29,24 @@ public class HdfsFileHelper {
public static void deleteCacheFile(String[] path) throws IOException {
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
String cacheFilePath = path[0] + Predefine.BIN_EXT;
LOGGER.info("delete cache file:{}", cacheFilePath);
log.info("delete cache file:{}", cacheFilePath);
try {
fs.delete(new Path(cacheFilePath), false);
} catch (Exception e) {
LOGGER.error("delete:" + cacheFilePath, e);
log.error("delete:" + cacheFilePath, e);
}
int customBase = cacheFilePath.lastIndexOf(FILE_SPILT);
String customPath = cacheFilePath.substring(0, customBase) + FILE_SPILT + "*.bin";
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.bin";
List<String> fileList = getFileList(fs, new Path(customPath));
for (String file : fileList) {
try {
fs.delete(new Path(file), false);
LOGGER.info("delete cache file:{}", file);
log.info("delete cache file:{}", file);
} catch (Exception e) {
LOGGER.error("delete " + file, e);
log.error("delete " + file, e);
}
}
LOGGER.info("fileList:{}", fileList);
log.info("fileList:{}", fileList);
}
/**
@@ -61,11 +59,11 @@ public class HdfsFileHelper {
String[] path = CustomDictionaryPath;
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
String cacheFilePath = path[0] + Predefine.BIN_EXT;
int customBase = cacheFilePath.lastIndexOf(FILE_SPILT);
String customPath = cacheFilePath.substring(0, customBase) + FILE_SPILT + "*.txt";
LOGGER.info("customPath:{}", customPath);
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.txt";
log.info("customPath:{}", customPath);
List<String> fileList = getFileList(fs, new Path(customPath));
LOGGER.info("CustomDictionaryPath:{}", fileList);
log.info("CustomDictionaryPath:{}", fileList);
CustomDictionaryPath = fileList.toArray(new String[0]);
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
: CustomDictionaryPath;

View File

@@ -1,2 +1,2 @@
com.tencent.supersonic.knowledge.domain.FileHandler=\
com.tencent.supersonic.knowledge.domain.LocalFileHandler
com.tencent.supersonic.knowledge.dictionary.FileHandler=\
com.tencent.supersonic.knowledge.dictionary.LocalFileHandler

View File

@@ -2,10 +2,10 @@
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tencent.supersonic.knowledge.infrastructure.custom.DictConfMapper">
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper">
<resultMap id="DictConfPO"
type="com.tencent.supersonic.knowledge.domain.dataobject.DictConfPO">
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO">
<id column="id" property="id"/>
<result column="domain_id" property="domainId"/>
<result column="dim_value_infos" property="dimValueInfos"/>

View File

@@ -2,10 +2,10 @@
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.tencent.supersonic.knowledge.infrastructure.custom.DictTaskMapper">
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper">
<resultMap id="DimValueDictTaskPO"
type="com.tencent.supersonic.knowledge.domain.dataobject.DimValueDictTaskPO">
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO">
<id column="id" property="id"/>
<result column="name" property="name"/>
<result column="description" property="description"/>