[release][project] supersonic 0.7.3 version backend update (#40)

* [improvement] add some features

* [improvement] revise CHANGELOG

---------

Co-authored-by: zuopengge <hwzuopengge@tencent.com>
This commit is contained in:
mainmain
2023-08-29 20:06:34 +08:00
committed by GitHub
parent 6fe9ab79ed
commit e1911bc81b
260 changed files with 6466 additions and 7108 deletions

View File

@@ -1,6 +1,8 @@
package com.hankcs.hanlp.collection.trie.bintrie;
import com.hankcs.hanlp.corpus.io.ByteArray;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.knowledge.service.LoadRemoveService;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.ObjectInput;
@@ -8,9 +10,12 @@ import java.io.ObjectOutput;
import java.util.AbstractMap;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class BaseNode<V> implements Comparable<BaseNode> {
@@ -19,6 +24,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
* 状态数组,方便读取的时候用
*/
static final Status[] ARRAY_STATUS = Status.values();
private static final Logger logger = LoggerFactory.getLogger(BaseNode.class);
/**
* 子节点
*/
@@ -279,10 +286,14 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
+ '}';
}
public void walkNode(Set<Map.Entry<String, V>> entrySet) {
public void walkNode(Set<Map.Entry<String, V>> entrySet, Integer agentId, Set<Long> detectModelIds) {
if (status == Status.WORD_MIDDLE_2 || status == Status.WORD_END_3) {
LoadRemoveService loadRemoveService = ContextUtils.getBean(LoadRemoveService.class);
logger.debug("agentId:{},detectModelIds:{},before:{}", agentId, detectModelIds, value.toString());
List natures = loadRemoveService.removeNatures((List) value, agentId, detectModelIds);
String name = this.prefix != null ? this.prefix + c : "" + c;
entrySet.add(new TrieEntry(name, value));
logger.debug("name:{},after:{},natures:{}", name, (List) value, natures);
entrySet.add(new TrieEntry(name, (V) natures));
}
}
@@ -292,7 +303,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
* @param entrySet
* @param limit
*/
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet, int limit) {
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet, int limit, Integer agentId,
Set<Long> detectModelIds) {
Queue<BaseNode> queue = new ArrayDeque<>();
this.prefix = sb.toString();
queue.add(this);
@@ -304,7 +316,7 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
if (root == null) {
continue;
}
root.walkNode(entrySet);
root.walkNode(entrySet, agentId, detectModelIds);
if (root.child == null) {
continue;
}

View File

@@ -0,0 +1,21 @@
package com.tencent.supersonic.knowledge.dictionary;
import java.io.Serializable;
import lombok.Builder;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
@Builder
public class ModelInfoStat implements Serializable {
private long modelCount;
private long metricModelCount;
private long dimensionModelCount;
private long dimensionValueModelCount;
}

View File

@@ -23,4 +23,4 @@ public class WordBuilderFactory {
public static BaseWordBuilder get(DictWordType strategyType) {
return wordNatures.get(strategyType);
}
}
}

View File

@@ -8,7 +8,12 @@ import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
import com.tencent.supersonic.semantic.api.model.response.*;
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
@@ -31,7 +36,7 @@ public class LocalSemanticLayer extends BaseSemanticLayer {
@SneakyThrows
@Override
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user){
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
QueryService queryService = ContextUtils.getBean(QueryService.class);
return queryService.queryByStructWithAuth(queryStructReq, user);
}

View File

@@ -12,7 +12,13 @@ import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
import org.springframework.util.CollectionUtils;
import java.util.*;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Objects;
import java.util.Map;
import java.util.stream.Collectors;
public class ModelSchemaBuilder {

View File

@@ -1,43 +1,37 @@
package com.tencent.supersonic.knowledge.semantic;
import static com.tencent.supersonic.common.pojo.Constants.LIST_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.TOTAL_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.TRUE_LOWER;
import com.alibaba.fastjson.JSON;
import com.github.pagehelper.PageInfo;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.gson.Gson;
import com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig;
import com.tencent.supersonic.auth.api.authentication.constant.UserConstants;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.ResultData;
import com.tencent.supersonic.common.pojo.ReturnCode;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.common.pojo.exception.CommonException;
import com.tencent.supersonic.common.util.ContextUtils;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.common.util.S2ThreadContext;
import com.tencent.supersonic.common.util.ThreadContext;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import com.tencent.supersonic.common.pojo.exception.CommonException;
import com.tencent.supersonic.common.pojo.ResultData;
import com.tencent.supersonic.common.pojo.ReturnCode;
import java.net.URI;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import java.util.LinkedHashMap;
import lombok.extern.slf4j.Slf4j;
import org.apache.logging.log4j.util.Strings;
import org.springframework.beans.BeanUtils;
@@ -50,6 +44,11 @@ import org.springframework.http.ResponseEntity;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.util.UriComponentsBuilder;
import static com.tencent.supersonic.common.pojo.Constants.TRUE_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.LIST_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.TOTAL_LOWER;
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
@Slf4j
public class RemoteSemanticLayer extends BaseSemanticLayer {
@@ -57,8 +56,6 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
private AuthenticationConfig authenticationConfig;
private static final Cache<String, List<ModelSchemaResp>> domainSchemaCache =
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
private ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
};
@@ -125,7 +122,7 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
URI requestUrl = UriComponentsBuilder.fromHttpUrl(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelSchemaPath()).build()
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelSchemaPath()).build()
.encode().toUri();
ModelSchemaFilterReq filter = new ModelSchemaFilterReq();
filter.setModelIds(ids);
@@ -155,8 +152,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
public List<DomainResp> getDomainList(User user) {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
Object domainDescListObject = fetchHttpResult(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(), null,
HttpMethod.GET);
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(),
null, HttpMethod.GET);
return JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
}
@@ -167,8 +164,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
}
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
String url = String.format("%s?domainId=%s&authType=%s",
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelListPath()
, domainId, authType.toString());
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelListPath(),
domainId, authType.toString());
Object domainDescListObject = fetchHttpResult(url, null, HttpMethod.GET);
return JsonUtil.toList(JsonUtil.toString(domainDescListObject), ModelResp.class);
}
@@ -218,8 +215,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
log.info("url:{}", defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath());
Object dimensionListObject = fetchHttpResult(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(), body,
HttpMethod.POST);
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(),
body, HttpMethod.POST);
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
PageInfo<Object> metricDescObjectPageInfo = generatePageInfo(map);
PageInfo<MetricResp> metricDescPageInfo = new PageInfo<>();
@@ -233,8 +230,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
String body = JsonUtil.toString(pageDimensionCmd);
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
Object dimensionListObject = fetchHttpResult(
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(), body,
HttpMethod.POST);
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(),
body, HttpMethod.POST);
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
PageInfo<Object> dimensionDescObjectPageInfo = generatePageInfo(map);
PageInfo<DimensionResp> dimensionDescPageInfo = new PageInfo<>();

View File

@@ -0,0 +1,55 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.utils.NatureHelper;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
@Data
@Service
public class LoadRemoveService {
@Value("${mapper.remove.agentId:}")
private Integer mapperRemoveAgentId;
@Value("${mapper.remove.nature.prefix:}")
private String mapperRemoveNaturePrefix;
public List removeNatures(List value, Integer agentId, Set<Long> detectModelIds) {
if (CollectionUtils.isEmpty(value)) {
return value;
}
List<String> resultList = new ArrayList<>(value);
if (!CollectionUtils.isEmpty(detectModelIds)) {
resultList.removeIf(nature -> {
if (Objects.isNull(nature)) {
return false;
}
Long modelId = NatureHelper.getModelId(nature);
if (Objects.nonNull(modelId)) {
return !detectModelIds.contains(modelId);
}
return false;
});
}
if (Objects.nonNull(mapperRemoveAgentId)
&& mapperRemoveAgentId.equals(agentId)
&& StringUtils.isNotBlank(mapperRemoveNaturePrefix)) {
resultList.removeIf(nature -> {
if (Objects.isNull(nature)) {
return false;
}
return nature.startsWith(mapperRemoveNaturePrefix);
});
}
return resultList;
}
}

View File

@@ -15,9 +15,9 @@ import org.springframework.stereotype.Service;
@Slf4j
public class SchemaService {
private static final Integer META_CACHE_TIME = 5;
public static final String ALL_CACHE = "all";
public static final String ALL_CACHE = "all";
private static final Integer META_CACHE_TIME = 5;
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()

View File

@@ -27,7 +27,6 @@ public class SearchService {
public static final int SEARCH_SIZE = 200;
private static BinTrie<List<String>> trie;
private static BinTrie<List<String>> suffixTrie;
private static String localFileCache = "";
static {
trie = new BinTrie<>();
@@ -39,16 +38,13 @@ public class SearchService {
* @param key
* @return
*/
public static List<MapResult> prefixSearch(String key) {
return prefixSearch(key, SEARCH_SIZE, trie);
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
return prefixSearch(key, limit, agentId, trie, detectModelIds);
}
public static List<MapResult> prefixSearch(String key, int limit) {
return prefixSearch(key, limit, trie);
}
public static List<MapResult> prefixSearch(String key, int limit, BinTrie<List<String>> binTrie) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie);
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
Set<Long> detectModelIds) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
return result.stream().map(
entry -> {
String name = entry.getKey().replace("#", " ");
@@ -64,13 +60,14 @@ public class SearchService {
* @param key
* @return
*/
public static List<MapResult> suffixSearch(String key, int limit) {
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
String reverseDetectSegment = StringUtils.reverse(key);
return suffixSearch(reverseDetectSegment, limit, suffixTrie);
return suffixSearch(reverseDetectSegment, limit, agentId, suffixTrie, detectModelIds);
}
public static List<MapResult> suffixSearch(String key, int limit, BinTrie<List<String>> binTrie) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie);
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
Set<Long> detectModelIds) {
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
return result.stream().map(
entry -> {
String name = entry.getKey().replace("#", " ");
@@ -86,7 +83,7 @@ public class SearchService {
}
private static Set<Map.Entry<String, List<String>>> prefixSearchLimit(String key, int limit,
BinTrie<List<String>> binTrie) {
BinTrie<List<String>> binTrie, Integer agentId, Set<Long> detectModelIds) {
key = key.toLowerCase();
Set<Map.Entry<String, List<String>>> entrySet = new TreeSet<Map.Entry<String, List<String>>>();
StringBuilder sb = new StringBuilder(key.substring(0, key.length() - 1));
@@ -102,7 +99,7 @@ public class SearchService {
if (branch == null) {
return entrySet;
}
branch.walkLimit(sb, entrySet, limit);
branch.walkLimit(sb, entrySet, limit, agentId, detectModelIds);
return entrySet;
}

View File

@@ -0,0 +1,165 @@
package com.tencent.supersonic.knowledge.utils;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.ModelInfoStat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
/**
* nature parse helper
*/
@Slf4j
public class NatureHelper {
public static SchemaElementType convertToElementType(String nature) {
DictWordType dictWordType = DictWordType.getNatureType(nature);
SchemaElementType result = null;
switch (dictWordType) {
case METRIC:
result = SchemaElementType.METRIC;
break;
case DIMENSION:
result = SchemaElementType.DIMENSION;
break;
case ENTITY:
result = SchemaElementType.ENTITY;
break;
case MODEL:
result = SchemaElementType.MODEL;
break;
case VALUE:
result = SchemaElementType.VALUE;
break;
default:
break;
}
return result;
}
private static boolean isModelOrEntity(Term term, Integer model) {
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
.endsWith(DictWordType.ENTITY.getType());
}
public static Integer getModelByNature(Nature nature) {
if (nature.startsWith(DictWordType.NATURE_SPILT)) {
String[] dimensionValues = nature.toString().split(DictWordType.NATURE_SPILT);
if (StringUtils.isNumeric(dimensionValues[1])) {
return Integer.valueOf(dimensionValues[1]);
}
}
return 0;
}
public static Long getModelId(String nature) {
try {
String[] split = nature.split(DictWordType.NATURE_SPILT);
if (split.length <= 1) {
return null;
}
return Long.valueOf(split[1]);
} catch (NumberFormatException e) {
log.error("", e);
}
return null;
}
public static boolean isDimensionValueModelId(String nature) {
if (StringUtils.isEmpty(nature)) {
return false;
}
if (!nature.startsWith(DictWordType.NATURE_SPILT)) {
return false;
}
String[] split = nature.split(DictWordType.NATURE_SPILT);
if (split.length <= 1) {
return false;
}
return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(DictWordType.DIMENSION.getType())
&& StringUtils.isNumeric(split[1]);
}
public static ModelInfoStat getModelStat(List<Term> terms) {
return ModelInfoStat.builder()
.modelCount(getModelCount(terms))
.dimensionModelCount(getDimensionCount(terms))
.metricModelCount(getMetricCount(terms))
.dimensionValueModelCount(getDimensionValueCount(terms))
.build();
}
private static long getModelCount(List<Term> terms) {
return terms.stream().filter(term -> isModelOrEntity(term, getModelByNature(term.nature))).count();
}
private static long getDimensionValueCount(List<Term> terms) {
return terms.stream().filter(term -> isDimensionValueModelId(term.nature.toString())).count();
}
private static long getDimensionCount(List<Term> terms) {
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
.endsWith(DictWordType.DIMENSION.getType())).count();
}
private static long getMetricCount(List<Term> terms) {
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
.endsWith(DictWordType.METRIC.getType())).count();
}
/**
* Get the number of types of class parts of speech
* modelId -> (nature , natureCount)
*
* @param terms
* @return
*/
public static Map<Long, Map<DictWordType, Integer>> getModelToNatureStat(List<Term> terms) {
Map<Long, Map<DictWordType, Integer>> modelToNature = new HashMap<>();
terms.stream().filter(
term -> term.nature.startsWith(DictWordType.NATURE_SPILT)
).forEach(term -> {
DictWordType dictWordType = DictWordType.getNatureType(String.valueOf(term.nature));
Long model = getModelId(String.valueOf(term.nature));
Map<DictWordType, Integer> natureTypeMap = new HashMap<>();
natureTypeMap.put(dictWordType, 1);
Map<DictWordType, Integer> original = modelToNature.get(model);
if (Objects.isNull(original)) {
modelToNature.put(model, natureTypeMap);
} else {
Integer count = original.get(dictWordType);
if (Objects.isNull(count)) {
count = 1;
} else {
count = count + 1;
}
original.put(dictWordType, count);
}
});
return modelToNature;
}
public static List<Long> selectPossibleModels(List<Term> terms) {
Map<Long, Map<DictWordType, Integer>> modelToNatureStat = getModelToNatureStat(terms);
Integer maxModelTypeSize = modelToNatureStat.entrySet().stream()
.max(Comparator.comparingInt(o -> o.getValue().size())).map(entry -> entry.getValue().size())
.orElse(null);
if (Objects.isNull(maxModelTypeSize) || maxModelTypeSize == 0) {
return new ArrayList<>();
}
return modelToNatureStat.entrySet().stream().filter(entry -> entry.getValue().size() == maxModelTypeSize)
.map(entry -> entry.getKey()).collect(Collectors.toList());
}
}