[release](project)update version 0.7.4 backend (#66)

This commit is contained in:
daikon
2023-09-10 21:26:46 +08:00
committed by GitHub
parent 02068f58c7
commit a8add4c013
172 changed files with 2180 additions and 1082 deletions

View File

@@ -11,10 +11,6 @@
<artifactId>chat-knowledge</artifactId>
<dependencies>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
@@ -111,8 +107,9 @@
<groupId>com.tencent.supersonic</groupId>
<artifactId>semantic-query</artifactId>
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
</dependencies>
</project>
</project>

View File

@@ -1,55 +0,0 @@
package com.tencent.supersonic.knowledge.dictionary;
import org.apache.commons.lang3.StringUtils;
/***
* nature type
* such as : metric、dimension etc.
*/
public enum DictWordType {
METRIC("metric"),
DIMENSION("dimension"),
VALUE("value"),
DOMAIN("dm"),
MODEL("model"),
ENTITY("entity"),
NUMBER("m"),
SUFFIX("suffix");
public static final String NATURE_SPILT = "_";
public static final String SPACE = " ";
private String type;
DictWordType(String type) {
this.type = type;
}
public String getType() {
return NATURE_SPILT + type;
}
public static DictWordType getNatureType(String nature) {
if (StringUtils.isEmpty(nature) || !nature.startsWith(NATURE_SPILT)) {
return null;
}
for (DictWordType dictWordType : values()) {
if (nature.endsWith(dictWordType.getType())) {
return dictWordType;
}
}
//domain
String[] natures = nature.split(DictWordType.NATURE_SPILT);
if (natures.length == 2 && StringUtils.isNumeric(natures[1])) {
return DOMAIN;
}
//dimension value
if (natures.length == 3 && StringUtils.isNumeric(natures[1]) && StringUtils.isNumeric(natures[2])) {
return VALUE;
}
return null;
}
}

View File

@@ -375,4 +375,4 @@ public class MultiCustomDictionary extends DynamicCustomDictionary {
return true;
}
}
}
}

View File

@@ -5,7 +5,7 @@ import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
/**

View File

@@ -7,7 +7,7 @@ import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

View File

@@ -6,7 +6,7 @@ import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -7,7 +7,7 @@ import java.util.Objects;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

View File

@@ -7,7 +7,7 @@ import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

View File

@@ -7,7 +7,7 @@ import java.util.Objects;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic.knowledge;
package com.tencent.supersonic.knowledge.listener;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.service.SchemaService;
@@ -16,7 +16,7 @@ import java.util.List;
@Slf4j
@Component
public class ApplicationStartedInit implements ApplicationListener<ApplicationStartedEvent> {
public class ApplicationStartedListener implements ApplicationListener<ApplicationStartedEvent> {
@Autowired
private KnowledgeService knowledgeService;
@@ -27,6 +27,11 @@ public class ApplicationStartedInit implements ApplicationListener<ApplicationSt
@Override
public void onApplicationEvent(ApplicationStartedEvent event) {
updateKnowledgeDimValue();
}
public Boolean updateKnowledgeDimValue() {
Boolean isOk = false;
try {
log.debug("ApplicationStartedInit start");
@@ -35,9 +40,11 @@ public class ApplicationStartedInit implements ApplicationListener<ApplicationSt
knowledgeService.reloadAllData(dictWords);
log.debug("ApplicationStartedInit end");
isOk = true;
} catch (Exception e) {
log.error("ApplicationStartedInit error", e);
}
return isOk;
}
/***
@@ -66,4 +73,4 @@ public class ApplicationStartedInit implements ApplicationListener<ApplicationSt
log.debug("reloadKnowledge end");
}
}
}

View File

@@ -0,0 +1,27 @@
package com.tencent.supersonic.knowledge.listener;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DataAddEvent;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.ApplicationListener;
import org.springframework.stereotype.Component;
@Component
@Slf4j
public class DataAddListener implements ApplicationListener<DataAddEvent> {
@Override
public void onApplicationEvent(DataAddEvent event) {
DictWord dictWord = new DictWord();
dictWord.setWord(event.getName());
String sign = DictWordType.NATURE_SPILT;
String nature = sign + event.getModelId() + sign + event.getId() + event.getType();
String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY;
dictWord.setNature(nature);
dictWord.setNatureWithFrequency(natureWithFrequency);
log.info("dataAddListener begins to add data:{}", dictWord);
HanlpHelper.addToCustomDictionary(dictWord);
}
}

View File

@@ -0,0 +1,27 @@
package com.tencent.supersonic.knowledge.listener;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DataDeleteEvent;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.ApplicationListener;
import org.springframework.stereotype.Component;
@Component
@Slf4j
public class DataDeleteListener implements ApplicationListener<DataDeleteEvent> {
@Override
public void onApplicationEvent(DataDeleteEvent event) {
DictWord dictWord = new DictWord();
dictWord.setWord(event.getName());
String sign = DictWordType.NATURE_SPILT;
String nature = sign + event.getModelId() + sign + event.getId() + event.getType();
String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY;
dictWord.setNature(nature);
dictWord.setNatureWithFrequency(natureWithFrequency);
log.info("dataDeleteListener begins to delete data:{}", dictWord);
HanlpHelper.removeFromCustomDictionary(dictWord);
}
}

View File

@@ -0,0 +1,29 @@
package com.tencent.supersonic.knowledge.listener;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DataUpdateEvent;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.ApplicationListener;
import org.springframework.stereotype.Component;
@Component
@Slf4j
public class DataUpdateListener implements ApplicationListener<DataUpdateEvent> {
@Override
public void onApplicationEvent(DataUpdateEvent event) {
DictWord dictWord = new DictWord();
dictWord.setWord(event.getName());
String sign = DictWordType.NATURE_SPILT;
String nature = sign + event.getModelId() + sign + event.getId() + event.getType();
String natureWithFrequency = nature + " " + Constants.DEFAULT_FREQUENCY;
dictWord.setNature(nature);
dictWord.setNatureWithFrequency(natureWithFrequency);
log.info("dataUpdateListener begins to update data:{}", dictWord);
HanlpHelper.removeFromCustomDictionary(dictWord);
dictWord.setWord(event.getNewName());
HanlpHelper.addToCustomDictionary(dictWord);
}
}

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.util.List;
@@ -52,4 +52,4 @@ public class KnowledgeServiceImpl implements KnowledgeService {
}
}
}
}

View File

@@ -5,7 +5,7 @@ import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.DictionaryAttributeUtil;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import java.util.Arrays;

View File

@@ -4,7 +4,7 @@ import com.tencent.supersonic.chat.api.component.SemanticLayer;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.builder.WordBuilderFactory;
import java.util.ArrayList;

View File

@@ -7,7 +7,7 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import java.io.FileNotFoundException;
import java.io.IOException;
@@ -20,6 +20,7 @@ import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
import org.springframework.util.ResourceUtils;
@@ -163,6 +164,29 @@ public class HanlpHelper {
return getDynamicCustomDictionary().insert(dictWord.getWord(), dictWord.getNatureWithFrequency());
}
public static void removeFromCustomDictionary(DictWord dictWord) {
log.info("dictWord:{}", dictWord);
CoreDictionary.Attribute attribute = getDynamicCustomDictionary().get(dictWord.getWord());
if (attribute != null) {
return;
}
log.info("get attribute:{}", attribute);
getDynamicCustomDictionary().remove(dictWord.getWord());
StringBuilder sb = new StringBuilder();
for (int i = 0; i < attribute.nature.length; i++) {
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
sb.append(attribute.nature[i].toString() + " ");
sb.append(attribute.frequency[i] + " ");
}
}
String natureWithFrequency = sb.toString();
int len = natureWithFrequency.length();
log.info("filtered natureWithFrequency:{}", natureWithFrequency);
if (StringUtils.isNotBlank(natureWithFrequency)) {
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
}
}
public static void transLetterOriginal(List<MapResult> mapResults) {
if (CollectionUtils.isEmpty(mapResults)) {
return;

View File

@@ -3,7 +3,7 @@ package com.tencent.supersonic.knowledge.utils;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.common.pojo.enums.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.ModelInfoStat;
import java.util.ArrayList;
import java.util.Comparator;