mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-14 05:43:51 +00:00
[release][project] supersonic 0.7.3 version backend update (#40)
* [improvement] add some features * [improvement] revise CHANGELOG --------- Co-authored-by: zuopengge <hwzuopengge@tencent.com>
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
package com.hankcs.hanlp.collection.trie.bintrie;
|
||||
|
||||
import com.hankcs.hanlp.corpus.io.ByteArray;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.knowledge.service.LoadRemoveService;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.ObjectInput;
|
||||
@@ -8,9 +10,12 @@ import java.io.ObjectOutput;
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.Set;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
@@ -19,6 +24,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
* 状态数组,方便读取的时候用
|
||||
*/
|
||||
static final Status[] ARRAY_STATUS = Status.values();
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(BaseNode.class);
|
||||
/**
|
||||
* 子节点
|
||||
*/
|
||||
@@ -279,10 +286,14 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
+ '}';
|
||||
}
|
||||
|
||||
public void walkNode(Set<Map.Entry<String, V>> entrySet) {
|
||||
public void walkNode(Set<Map.Entry<String, V>> entrySet, Integer agentId, Set<Long> detectModelIds) {
|
||||
if (status == Status.WORD_MIDDLE_2 || status == Status.WORD_END_3) {
|
||||
LoadRemoveService loadRemoveService = ContextUtils.getBean(LoadRemoveService.class);
|
||||
logger.debug("agentId:{},detectModelIds:{},before:{}", agentId, detectModelIds, value.toString());
|
||||
List natures = loadRemoveService.removeNatures((List) value, agentId, detectModelIds);
|
||||
String name = this.prefix != null ? this.prefix + c : "" + c;
|
||||
entrySet.add(new TrieEntry(name, value));
|
||||
logger.debug("name:{},after:{},natures:{}", name, (List) value, natures);
|
||||
entrySet.add(new TrieEntry(name, (V) natures));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,7 +303,8 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
* @param entrySet
|
||||
* @param limit
|
||||
*/
|
||||
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet, int limit) {
|
||||
public void walkLimit(StringBuilder sb, Set<Map.Entry<String, V>> entrySet, int limit, Integer agentId,
|
||||
Set<Long> detectModelIds) {
|
||||
Queue<BaseNode> queue = new ArrayDeque<>();
|
||||
this.prefix = sb.toString();
|
||||
queue.add(this);
|
||||
@@ -304,7 +316,7 @@ public abstract class BaseNode<V> implements Comparable<BaseNode> {
|
||||
if (root == null) {
|
||||
continue;
|
||||
}
|
||||
root.walkNode(entrySet);
|
||||
root.walkNode(entrySet, agentId, detectModelIds);
|
||||
if (root.child == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
package com.tencent.supersonic.knowledge.dictionary;
|
||||
|
||||
import java.io.Serializable;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
@Builder
|
||||
public class ModelInfoStat implements Serializable {
|
||||
|
||||
private long modelCount;
|
||||
|
||||
private long metricModelCount;
|
||||
|
||||
private long dimensionModelCount;
|
||||
|
||||
private long dimensionValueModelCount;
|
||||
|
||||
}
|
||||
@@ -23,4 +23,4 @@ public class WordBuilderFactory {
|
||||
public static BaseWordBuilder get(DictWordType strategyType) {
|
||||
return wordNatures.get(strategyType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,12 @@ import com.tencent.supersonic.common.pojo.enums.AuthType;
|
||||
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
|
||||
import com.tencent.supersonic.semantic.api.model.response.*;
|
||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
@@ -31,7 +36,7 @@ public class LocalSemanticLayer extends BaseSemanticLayer {
|
||||
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user){
|
||||
public QueryResultWithSchemaResp queryByStruct(QueryStructReq queryStructReq, User user) {
|
||||
QueryService queryService = ContextUtils.getBean(QueryService.class);
|
||||
return queryService.queryByStructWithAuth(queryStructReq, user);
|
||||
}
|
||||
|
||||
@@ -12,7 +12,13 @@ import org.apache.logging.log4j.util.Strings;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class ModelSchemaBuilder {
|
||||
|
||||
@@ -1,43 +1,37 @@
|
||||
package com.tencent.supersonic.knowledge.semantic;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.Constants.LIST_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.TOTAL_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.TRUE_LOWER;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.gson.Gson;
|
||||
import com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig;
|
||||
import com.tencent.supersonic.auth.api.authentication.constant.UserConstants;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.ResultData;
|
||||
import com.tencent.supersonic.common.pojo.ReturnCode;
|
||||
import com.tencent.supersonic.common.pojo.enums.AuthType;
|
||||
import com.tencent.supersonic.common.pojo.exception.CommonException;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.util.S2ThreadContext;
|
||||
import com.tencent.supersonic.common.util.ThreadContext;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.pojo.enums.AuthType;
|
||||
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageDimensionReq;
|
||||
import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
import com.tencent.supersonic.common.pojo.exception.CommonException;
|
||||
import com.tencent.supersonic.common.pojo.ResultData;
|
||||
import com.tencent.supersonic.common.pojo.ReturnCode;
|
||||
|
||||
import java.net.URI;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
@@ -50,6 +44,11 @@ import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.Constants.TRUE_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.LIST_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.TOTAL_LOWER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.PAGESIZE_LOWER;
|
||||
|
||||
@Slf4j
|
||||
public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
|
||||
@@ -57,8 +56,6 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
|
||||
private AuthenticationConfig authenticationConfig;
|
||||
|
||||
private static final Cache<String, List<ModelSchemaResp>> domainSchemaCache =
|
||||
CacheBuilder.newBuilder().expireAfterWrite(10, TimeUnit.SECONDS).build();
|
||||
private ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>> structTypeRef =
|
||||
new ParameterizedTypeReference<ResultData<QueryResultWithSchemaResp>>() {
|
||||
};
|
||||
@@ -125,7 +122,7 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelSchemaPath()).build()
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelSchemaPath()).build()
|
||||
.encode().toUri();
|
||||
ModelSchemaFilterReq filter = new ModelSchemaFilterReq();
|
||||
filter.setModelIds(ids);
|
||||
@@ -155,8 +152,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
public List<DomainResp> getDomainList(User user) {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
Object domainDescListObject = fetchHttpResult(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(), null,
|
||||
HttpMethod.GET);
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDomainListPath(),
|
||||
null, HttpMethod.GET);
|
||||
return JsonUtil.toList(JsonUtil.toString(domainDescListObject), DomainResp.class);
|
||||
}
|
||||
|
||||
@@ -167,8 +164,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
}
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
String url = String.format("%s?domainId=%s&authType=%s",
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelListPath()
|
||||
, domainId, authType.toString());
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchModelListPath(),
|
||||
domainId, authType.toString());
|
||||
Object domainDescListObject = fetchHttpResult(url, null, HttpMethod.GET);
|
||||
return JsonUtil.toList(JsonUtil.toString(domainDescListObject), ModelResp.class);
|
||||
}
|
||||
@@ -218,8 +215,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
log.info("url:{}", defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath());
|
||||
Object dimensionListObject = fetchHttpResult(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(), body,
|
||||
HttpMethod.POST);
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchMetricPagePath(),
|
||||
body, HttpMethod.POST);
|
||||
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
|
||||
PageInfo<Object> metricDescObjectPageInfo = generatePageInfo(map);
|
||||
PageInfo<MetricResp> metricDescPageInfo = new PageInfo<>();
|
||||
@@ -233,8 +230,8 @@ public class RemoteSemanticLayer extends BaseSemanticLayer {
|
||||
String body = JsonUtil.toString(pageDimensionCmd);
|
||||
DefaultSemanticConfig defaultSemanticConfig = ContextUtils.getBean(DefaultSemanticConfig.class);
|
||||
Object dimensionListObject = fetchHttpResult(
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(), body,
|
||||
HttpMethod.POST);
|
||||
defaultSemanticConfig.getSemanticUrl() + defaultSemanticConfig.getFetchDimensionPagePath(),
|
||||
body, HttpMethod.POST);
|
||||
LinkedHashMap map = (LinkedHashMap) dimensionListObject;
|
||||
PageInfo<Object> dimensionDescObjectPageInfo = generatePageInfo(map);
|
||||
PageInfo<DimensionResp> dimensionDescPageInfo = new PageInfo<>();
|
||||
|
||||
@@ -0,0 +1,55 @@
|
||||
package com.tencent.supersonic.knowledge.service;
|
||||
|
||||
import com.tencent.supersonic.knowledge.utils.NatureHelper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import lombok.Data;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Data
|
||||
@Service
|
||||
public class LoadRemoveService {
|
||||
|
||||
@Value("${mapper.remove.agentId:}")
|
||||
private Integer mapperRemoveAgentId;
|
||||
|
||||
@Value("${mapper.remove.nature.prefix:}")
|
||||
private String mapperRemoveNaturePrefix;
|
||||
|
||||
|
||||
public List removeNatures(List value, Integer agentId, Set<Long> detectModelIds) {
|
||||
if (CollectionUtils.isEmpty(value)) {
|
||||
return value;
|
||||
}
|
||||
List<String> resultList = new ArrayList<>(value);
|
||||
if (!CollectionUtils.isEmpty(detectModelIds)) {
|
||||
resultList.removeIf(nature -> {
|
||||
if (Objects.isNull(nature)) {
|
||||
return false;
|
||||
}
|
||||
Long modelId = NatureHelper.getModelId(nature);
|
||||
if (Objects.nonNull(modelId)) {
|
||||
return !detectModelIds.contains(modelId);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
if (Objects.nonNull(mapperRemoveAgentId)
|
||||
&& mapperRemoveAgentId.equals(agentId)
|
||||
&& StringUtils.isNotBlank(mapperRemoveNaturePrefix)) {
|
||||
resultList.removeIf(nature -> {
|
||||
if (Objects.isNull(nature)) {
|
||||
return false;
|
||||
}
|
||||
return nature.startsWith(mapperRemoveNaturePrefix);
|
||||
});
|
||||
}
|
||||
return resultList;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -15,9 +15,9 @@ import org.springframework.stereotype.Service;
|
||||
@Slf4j
|
||||
public class SchemaService {
|
||||
|
||||
private static final Integer META_CACHE_TIME = 5;
|
||||
public static final String ALL_CACHE = "all";
|
||||
|
||||
public static final String ALL_CACHE = "all";
|
||||
private static final Integer META_CACHE_TIME = 5;
|
||||
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
|
||||
|
||||
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()
|
||||
|
||||
@@ -27,7 +27,6 @@ public class SearchService {
|
||||
public static final int SEARCH_SIZE = 200;
|
||||
private static BinTrie<List<String>> trie;
|
||||
private static BinTrie<List<String>> suffixTrie;
|
||||
private static String localFileCache = "";
|
||||
|
||||
static {
|
||||
trie = new BinTrie<>();
|
||||
@@ -39,16 +38,13 @@ public class SearchService {
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public static List<MapResult> prefixSearch(String key) {
|
||||
return prefixSearch(key, SEARCH_SIZE, trie);
|
||||
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
|
||||
return prefixSearch(key, limit, agentId, trie, detectModelIds);
|
||||
}
|
||||
|
||||
public static List<MapResult> prefixSearch(String key, int limit) {
|
||||
return prefixSearch(key, limit, trie);
|
||||
}
|
||||
|
||||
public static List<MapResult> prefixSearch(String key, int limit, BinTrie<List<String>> binTrie) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie);
|
||||
public static List<MapResult> prefixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
|
||||
return result.stream().map(
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
@@ -64,13 +60,14 @@ public class SearchService {
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public static List<MapResult> suffixSearch(String key, int limit) {
|
||||
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, Set<Long> detectModelIds) {
|
||||
String reverseDetectSegment = StringUtils.reverse(key);
|
||||
return suffixSearch(reverseDetectSegment, limit, suffixTrie);
|
||||
return suffixSearch(reverseDetectSegment, limit, agentId, suffixTrie, detectModelIds);
|
||||
}
|
||||
|
||||
public static List<MapResult> suffixSearch(String key, int limit, BinTrie<List<String>> binTrie) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie);
|
||||
public static List<MapResult> suffixSearch(String key, int limit, Integer agentId, BinTrie<List<String>> binTrie,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, agentId, detectModelIds);
|
||||
return result.stream().map(
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
@@ -86,7 +83,7 @@ public class SearchService {
|
||||
}
|
||||
|
||||
private static Set<Map.Entry<String, List<String>>> prefixSearchLimit(String key, int limit,
|
||||
BinTrie<List<String>> binTrie) {
|
||||
BinTrie<List<String>> binTrie, Integer agentId, Set<Long> detectModelIds) {
|
||||
key = key.toLowerCase();
|
||||
Set<Map.Entry<String, List<String>>> entrySet = new TreeSet<Map.Entry<String, List<String>>>();
|
||||
StringBuilder sb = new StringBuilder(key.substring(0, key.length() - 1));
|
||||
@@ -102,7 +99,7 @@ public class SearchService {
|
||||
if (branch == null) {
|
||||
return entrySet;
|
||||
}
|
||||
branch.walkLimit(sb, entrySet, limit);
|
||||
branch.walkLimit(sb, entrySet, limit, agentId, detectModelIds);
|
||||
return entrySet;
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,165 @@
|
||||
package com.tencent.supersonic.knowledge.utils;
|
||||
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.ModelInfoStat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/**
|
||||
* nature parse helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class NatureHelper {
|
||||
|
||||
public static SchemaElementType convertToElementType(String nature) {
|
||||
DictWordType dictWordType = DictWordType.getNatureType(nature);
|
||||
SchemaElementType result = null;
|
||||
switch (dictWordType) {
|
||||
case METRIC:
|
||||
result = SchemaElementType.METRIC;
|
||||
break;
|
||||
case DIMENSION:
|
||||
result = SchemaElementType.DIMENSION;
|
||||
break;
|
||||
case ENTITY:
|
||||
result = SchemaElementType.ENTITY;
|
||||
break;
|
||||
case MODEL:
|
||||
result = SchemaElementType.MODEL;
|
||||
break;
|
||||
case VALUE:
|
||||
result = SchemaElementType.VALUE;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static boolean isModelOrEntity(Term term, Integer model) {
|
||||
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
|
||||
.endsWith(DictWordType.ENTITY.getType());
|
||||
}
|
||||
|
||||
public static Integer getModelByNature(Nature nature) {
|
||||
if (nature.startsWith(DictWordType.NATURE_SPILT)) {
|
||||
String[] dimensionValues = nature.toString().split(DictWordType.NATURE_SPILT);
|
||||
if (StringUtils.isNumeric(dimensionValues[1])) {
|
||||
return Integer.valueOf(dimensionValues[1]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static Long getModelId(String nature) {
|
||||
try {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
return Long.valueOf(split[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
log.error("", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static boolean isDimensionValueModelId(String nature) {
|
||||
if (StringUtils.isEmpty(nature)) {
|
||||
return false;
|
||||
}
|
||||
if (!nature.startsWith(DictWordType.NATURE_SPILT)) {
|
||||
return false;
|
||||
}
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return false;
|
||||
}
|
||||
return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(DictWordType.DIMENSION.getType())
|
||||
&& StringUtils.isNumeric(split[1]);
|
||||
}
|
||||
|
||||
public static ModelInfoStat getModelStat(List<Term> terms) {
|
||||
return ModelInfoStat.builder()
|
||||
.modelCount(getModelCount(terms))
|
||||
.dimensionModelCount(getDimensionCount(terms))
|
||||
.metricModelCount(getMetricCount(terms))
|
||||
.dimensionValueModelCount(getDimensionValueCount(terms))
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
private static long getModelCount(List<Term> terms) {
|
||||
return terms.stream().filter(term -> isModelOrEntity(term, getModelByNature(term.nature))).count();
|
||||
}
|
||||
|
||||
private static long getDimensionValueCount(List<Term> terms) {
|
||||
return terms.stream().filter(term -> isDimensionValueModelId(term.nature.toString())).count();
|
||||
}
|
||||
|
||||
private static long getDimensionCount(List<Term> terms) {
|
||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||
.endsWith(DictWordType.DIMENSION.getType())).count();
|
||||
}
|
||||
|
||||
private static long getMetricCount(List<Term> terms) {
|
||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||
.endsWith(DictWordType.METRIC.getType())).count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of types of class parts of speech
|
||||
* modelId -> (nature , natureCount)
|
||||
*
|
||||
* @param terms
|
||||
* @return
|
||||
*/
|
||||
public static Map<Long, Map<DictWordType, Integer>> getModelToNatureStat(List<Term> terms) {
|
||||
Map<Long, Map<DictWordType, Integer>> modelToNature = new HashMap<>();
|
||||
terms.stream().filter(
|
||||
term -> term.nature.startsWith(DictWordType.NATURE_SPILT)
|
||||
).forEach(term -> {
|
||||
DictWordType dictWordType = DictWordType.getNatureType(String.valueOf(term.nature));
|
||||
Long model = getModelId(String.valueOf(term.nature));
|
||||
|
||||
Map<DictWordType, Integer> natureTypeMap = new HashMap<>();
|
||||
natureTypeMap.put(dictWordType, 1);
|
||||
|
||||
Map<DictWordType, Integer> original = modelToNature.get(model);
|
||||
if (Objects.isNull(original)) {
|
||||
modelToNature.put(model, natureTypeMap);
|
||||
} else {
|
||||
Integer count = original.get(dictWordType);
|
||||
if (Objects.isNull(count)) {
|
||||
count = 1;
|
||||
} else {
|
||||
count = count + 1;
|
||||
}
|
||||
original.put(dictWordType, count);
|
||||
}
|
||||
});
|
||||
return modelToNature;
|
||||
}
|
||||
|
||||
public static List<Long> selectPossibleModels(List<Term> terms) {
|
||||
Map<Long, Map<DictWordType, Integer>> modelToNatureStat = getModelToNatureStat(terms);
|
||||
Integer maxModelTypeSize = modelToNatureStat.entrySet().stream()
|
||||
.max(Comparator.comparingInt(o -> o.getValue().size())).map(entry -> entry.getValue().size())
|
||||
.orElse(null);
|
||||
if (Objects.isNull(maxModelTypeSize) || maxModelTypeSize == 0) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return modelToNatureStat.entrySet().stream().filter(entry -> entry.getValue().size() == maxModelTypeSize)
|
||||
.map(entry -> entry.getKey()).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user