From 98656eb445afc04e1cf0d53fbda4972c4be6a5a9 Mon Sep 17 00:00:00 2001 From: lexluo09 <39718951+lexluo09@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:21:50 +0800 Subject: [PATCH] (improvement)(project) dsl support get topN dimension/metric by useCount and fix semanticService get dimension/metric usrCount error (#105) --- .../chat/config/LLMParserConfig.java | 6 ++ .../chat/parser/llm/dsl/LLMDslParser.java | 35 +++++++-- .../semantic/DefaultSemanticConfig.java | 1 + .../semantic/LocalSemanticLayer.java | 5 +- .../knowledge/service/SchemaService.java | 2 +- .../semantic/api/model/pojo/QueryStat.java | 2 +- .../api/query/request/ItemUseReq.java | 6 ++ .../model/application/ModelServiceImpl.java | 41 +++++----- .../query/service/QueryServiceImpl.java | 5 +- .../query/service/SchemaServiceImpl.java | 5 +- .../semantic/query/utils/StatUtils.java | 78 +++++++++++++++++-- .../src/main/resources/mapper/StatMapper.xml | 6 ++ 12 files changed, 154 insertions(+), 38 deletions(-) diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/config/LLMParserConfig.java b/chat/core/src/main/java/com/tencent/supersonic/chat/config/LLMParserConfig.java index e44029538..6032b798c 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/config/LLMParserConfig.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/config/LLMParserConfig.java @@ -16,4 +16,10 @@ public class LLMParserConfig { @Value("${query2sql.path:/query2sql}") private String queryToSqlPath; + @Value("${dimension.topn:5}") + private Integer dimensionTopN; + + @Value("${metric.topn:5}") + private Integer metricTopN; + } diff --git a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/dsl/LLMDslParser.java b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/dsl/LLMDslParser.java index 9d486d6e9..6acceb3d0 100644 --- a/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/dsl/LLMDslParser.java +++ b/chat/core/src/main/java/com/tencent/supersonic/chat/parser/llm/dsl/LLMDslParser.java @@ -39,6 +39,7 @@ import com.tencent.supersonic.semantic.api.model.enums.TimeDimensionEnum; import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum; import java.util.ArrayList; import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -87,7 +88,7 @@ public class LLMDslParser implements SemanticParser { return; } - LLMReq llmReq = getLlmReq(queryCtx, modelId); + LLMReq llmReq = getLlmReq(queryCtx, modelId, llmParserConfig); LLMResp llmResp = requestLLM(llmReq, modelId, llmParserConfig); if (Objects.isNull(llmResp)) { @@ -340,22 +341,28 @@ public class LLMDslParser implements SemanticParser { return null; } - private LLMReq getLlmReq(QueryContext queryCtx, Long modelId) { + private LLMReq getLlmReq(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) { SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema(); Map modelIdToName = semanticSchema.getModelIdToName(); String queryText = queryCtx.getRequest().getQueryText(); + LLMReq llmReq = new LLMReq(); llmReq.setQueryText(queryText); + LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema(); llmSchema.setModelName(modelIdToName.get(modelId)); llmSchema.setDomainName(modelIdToName.get(modelId)); - List fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema); + + List fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema, llmParserConfig); + fieldNameList.add(BaseSemanticCorrector.DATE_FIELD); llmSchema.setFieldNameList(fieldNameList); llmReq.setSchema(llmSchema); + List linking = new ArrayList<>(); linking.addAll(getValueList(queryCtx, modelId, semanticSchema)); llmReq.setLinking(linking); + String currentDate = DSLDateHelper.getReferenceDate(modelId); llmReq.setCurrentDate(currentDate); return llmReq; @@ -399,12 +406,27 @@ public class LLMDslParser implements SemanticParser { } - protected List getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) { + protected List getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema, + LLMParserConfig llmParserConfig) { Map itemIdToName = getItemIdToName(modelId, semanticSchema); + Set results = semanticSchema.getDimensions().stream() + .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) + .limit(llmParserConfig.getDimensionTopN()) + .map(entry -> entry.getName()) + .collect(Collectors.toSet()); + + Set metrics = semanticSchema.getMetrics().stream() + .sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed()) + .limit(llmParserConfig.getMetricTopN()) + .map(entry -> entry.getName()) + .collect(Collectors.toSet()); + + results.addAll(metrics); + List matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId); if (CollectionUtils.isEmpty(matchedElements)) { - return new ArrayList<>(); + return new ArrayList<>(results); } Set fieldNameList = matchedElements.stream() .filter(schemaElementMatch -> { @@ -423,7 +445,8 @@ public class LLMDslParser implements SemanticParser { }) .filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%")) .collect(Collectors.toSet()); - return new ArrayList<>(fieldNameList); + results.addAll(fieldNameList); + return new ArrayList<>(results); } protected Map getItemIdToName(Long modelId, SemanticSchema semanticSchema) { diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/DefaultSemanticConfig.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/DefaultSemanticConfig.java index 9038330cf..2152da0b7 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/DefaultSemanticConfig.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/DefaultSemanticConfig.java @@ -40,4 +40,5 @@ public class DefaultSemanticConfig { @Value("${explain.path:/api/semantic/query/explain}") private String explainPath; + } diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java index c675a7217..9d2d42ee3 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/semantic/LocalSemanticLayer.java @@ -81,8 +81,9 @@ public class LocalSemanticLayer extends BaseSemanticLayer { public List doFetchModelSchema(List ids) { ModelSchemaFilterReq filter = new ModelSchemaFilterReq(); filter.setModelIds(ids); - modelService = ContextUtils.getBean(ModelService.class); - return modelService.fetchModelSchema(filter); + schemaService = ContextUtils.getBean(SchemaService.class); + User user = User.getFakeUser(); + return schemaService.fetchModelSchema(filter, user); } @Override diff --git a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SchemaService.java b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SchemaService.java index 57256c79b..6bc575e62 100644 --- a/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SchemaService.java +++ b/chat/knowledge/src/main/java/com/tencent/supersonic/knowledge/service/SchemaService.java @@ -18,7 +18,7 @@ public class SchemaService { public static final String ALL_CACHE = "all"; - private static final Integer META_CACHE_TIME = 5; + private static final Integer META_CACHE_TIME = 2; private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer(); private LoadingCache cache = CacheBuilder.newBuilder() diff --git a/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/model/pojo/QueryStat.java b/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/model/pojo/QueryStat.java index f08b126f9..b9819ac69 100644 --- a/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/model/pojo/QueryStat.java +++ b/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/model/pojo/QueryStat.java @@ -79,7 +79,7 @@ public class QueryStat { return this; } - public QueryStat setClassId(Long modelId) { + public QueryStat setModelId(Long modelId) { this.modelId = modelId; return this; } diff --git a/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/query/request/ItemUseReq.java b/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/query/request/ItemUseReq.java index d1af76a40..f98a808dc 100644 --- a/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/query/request/ItemUseReq.java +++ b/semantic/api/src/main/java/com/tencent/supersonic/semantic/api/query/request/ItemUseReq.java @@ -1,5 +1,6 @@ package com.tencent.supersonic.semantic.api.query.request; +import java.util.List; import lombok.Data; import lombok.NoArgsConstructor; import lombok.ToString; @@ -11,6 +12,7 @@ public class ItemUseReq { private String startTime; private Long modelId; + private List modelIds; private Boolean cacheEnable = true; private String metric; @@ -18,4 +20,8 @@ public class ItemUseReq { this.startTime = startTime; this.modelId = modelId; } + public ItemUseReq(String startTime, List modelIds) { + this.startTime = startTime; + this.modelIds = modelIds; + } } diff --git a/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/application/ModelServiceImpl.java b/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/application/ModelServiceImpl.java index b10dd9704..e0aaafc76 100644 --- a/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/application/ModelServiceImpl.java +++ b/semantic/model/src/main/java/com/tencent/supersonic/semantic/model/application/ModelServiceImpl.java @@ -4,44 +4,43 @@ import com.alibaba.fastjson.JSONObject; import com.google.common.collect.Lists; import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.service.UserService; +import com.tencent.supersonic.common.pojo.enums.AuthType; import com.tencent.supersonic.common.util.BeanMapper; import com.tencent.supersonic.common.util.JsonUtil; -import com.tencent.supersonic.common.pojo.enums.AuthType; import com.tencent.supersonic.semantic.api.model.request.ModelReq; import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq; import com.tencent.supersonic.semantic.api.model.response.DatabaseResp; -import com.tencent.supersonic.semantic.api.model.response.ModelResp; -import com.tencent.supersonic.semantic.api.model.response.DomainResp; -import com.tencent.supersonic.semantic.api.model.response.DimensionResp; -import com.tencent.supersonic.semantic.api.model.response.MetricResp; -import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp; -import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp; -import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp; import com.tencent.supersonic.semantic.api.model.response.DatasourceResp; +import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp; +import com.tencent.supersonic.semantic.api.model.response.DimensionResp; +import com.tencent.supersonic.semantic.api.model.response.DomainResp; +import com.tencent.supersonic.semantic.api.model.response.MetricResp; +import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp; +import com.tencent.supersonic.semantic.api.model.response.ModelResp; +import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp; import com.tencent.supersonic.semantic.model.domain.DatabaseService; -import com.tencent.supersonic.semantic.model.domain.ModelService; -import com.tencent.supersonic.semantic.model.domain.DomainService; -import com.tencent.supersonic.semantic.model.domain.DimensionService; -import com.tencent.supersonic.semantic.model.domain.MetricService; import com.tencent.supersonic.semantic.model.domain.DatasourceService; - +import com.tencent.supersonic.semantic.model.domain.DimensionService; +import com.tencent.supersonic.semantic.model.domain.DomainService; +import com.tencent.supersonic.semantic.model.domain.MetricService; +import com.tencent.supersonic.semantic.model.domain.ModelService; import com.tencent.supersonic.semantic.model.domain.dataobject.ModelDO; import com.tencent.supersonic.semantic.model.domain.pojo.Model; import com.tencent.supersonic.semantic.model.domain.repository.ModelRepository; import com.tencent.supersonic.semantic.model.domain.utils.ModelConvert; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.BeanUtils; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; import org.springframework.util.CollectionUtils; -import java.util.List; -import java.util.Objects; -import java.util.Date; -import java.util.Set; -import java.util.Map; -import java.util.HashSet; -import java.util.ArrayList; -import java.util.stream.Collectors; @Slf4j @Service diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java index 3c0c0fc11..298a0e897 100644 --- a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/QueryServiceImpl.java @@ -67,8 +67,11 @@ public class QueryServiceImpl implements QueryService { @Override public Object queryBySql(QueryDslReq querySqlCmd, User user) throws Exception { + statUtils.initStatInfo(querySqlCmd, user); QueryStatement queryStatement = convertToQueryStatement(querySqlCmd, user); - return semanticQueryEngine.execute(queryStatement); + QueryResultWithSchemaResp results = semanticQueryEngine.execute(queryStatement); + statUtils.statInfo2DbAsync(TaskStatusEnum.SUCCESS); + return results; } private QueryStatement convertToQueryStatement(QueryDslReq querySqlCmd, User user) throws Exception { diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/SchemaServiceImpl.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/SchemaServiceImpl.java index 68058404b..485209c66 100644 --- a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/SchemaServiceImpl.java +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/service/SchemaServiceImpl.java @@ -55,7 +55,10 @@ public class SchemaServiceImpl implements SchemaService { @Override public List fetchModelSchema(ModelSchemaFilterReq filter, User user) { List domainSchemaDescList = modelService.fetchModelSchema(filter); - List statInfos = queryService.getStatInfo(new ItemUseReq()); + ItemUseReq itemUseCommend = new ItemUseReq(); + itemUseCommend.setModelIds(filter.getModelIds()); + + List statInfos = queryService.getStatInfo(itemUseCommend); log.debug("statInfos:{}", statInfos); fillCnt(domainSchemaDescList, statInfos); return domainSchemaDescList; diff --git a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/StatUtils.java b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/StatUtils.java index 0a6c8a5ca..a009750a3 100644 --- a/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/StatUtils.java +++ b/semantic/query/src/main/java/com/tencent/supersonic/semantic/query/utils/StatUtils.java @@ -4,22 +4,30 @@ import com.alibaba.ttl.TransmittableThreadLocal; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.tencent.supersonic.auth.api.authentication.pojo.User; +import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum; +import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper; import com.tencent.supersonic.semantic.api.model.enums.QueryTypeBackEnum; import com.tencent.supersonic.semantic.api.model.enums.QueryTypeEnum; import com.tencent.supersonic.semantic.api.model.pojo.QueryStat; +import com.tencent.supersonic.semantic.api.model.pojo.SchemaItem; +import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp; import com.tencent.supersonic.semantic.api.query.request.ItemUseReq; +import com.tencent.supersonic.semantic.api.query.request.QueryDslReq; import com.tencent.supersonic.semantic.api.query.request.QueryStructReq; import com.tencent.supersonic.semantic.api.query.response.ItemUseResp; -import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum; +import com.tencent.supersonic.semantic.model.domain.ModelService; import com.tencent.supersonic.semantic.query.persistence.repository.StatRepository; import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.commons.codec.digest.DigestUtils; import org.apache.logging.log4j.util.Strings; import org.springframework.stereotype.Component; +import org.springframework.util.CollectionUtils; @Component @Slf4j @@ -28,13 +36,17 @@ public class StatUtils { private static final TransmittableThreadLocal STATS = new TransmittableThreadLocal<>(); private final StatRepository statRepository; private final SqlFilterUtils sqlFilterUtils; + + private final ModelService modelService; private final ObjectMapper objectMapper = new ObjectMapper(); public StatUtils(StatRepository statRepository, - SqlFilterUtils sqlFilterUtils) { + SqlFilterUtils sqlFilterUtils, + ModelService modelService) { this.statRepository = statRepository; this.sqlFilterUtils = sqlFilterUtils; + this.modelService = modelService; } public static QueryStat get() { @@ -69,6 +81,44 @@ public class StatUtils { return true; } + + public void initStatInfo(QueryDslReq queryDslReq, User facadeUser) { + QueryStat queryStatInfo = new QueryStat(); + List allFields = SqlParserSelectHelper.getAllFields(queryDslReq.getSql()); + queryStatInfo.setModelId(queryDslReq.getModelId()); + ModelSchemaResp modelSchemaResp = modelService.fetchSingleModelSchema(queryDslReq.getModelId()); + + List dimensions = new ArrayList<>(); + if (Objects.nonNull(modelSchemaResp)) { + dimensions = getFieldNames(allFields, modelSchemaResp.getDimensions()); + } + + List metrics = new ArrayList<>(); + if (Objects.nonNull(modelSchemaResp)) { + metrics = getFieldNames(allFields, modelSchemaResp.getMetrics()); + } + + String userName = getUserName(facadeUser); + try { + queryStatInfo.setTraceId("") + .setModelId(queryDslReq.getModelId()) + .setUser(userName) + .setQueryType(QueryTypeEnum.SQL.getValue()) + .setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState()) + .setQuerySqlCmd(queryDslReq.toString()) + .setQuerySqlCmdMd5(DigestUtils.md5Hex(queryDslReq.toString())) + .setStartTime(System.currentTimeMillis()) + .setUseResultCache(true) + .setUseSqlCache(true) + .setMetrics(objectMapper.writeValueAsString(metrics)) + .setDimensions(objectMapper.writeValueAsString(dimensions)); + } catch (JsonProcessingException e) { + log.error("initStatInfo:{}", e); + } + StatUtils.set(queryStatInfo); + + } + public void initStatInfo(QueryStructReq queryStructCmd, User facadeUser) { QueryStat queryStatInfo = new QueryStat(); String traceId = ""; @@ -76,12 +126,11 @@ public class StatUtils { List metrics = new ArrayList<>(); queryStructCmd.getAggregators().stream().forEach(aggregator -> metrics.add(aggregator.getColumn())); - String user = (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName() - : "Admin"; + String user = getUserName(facadeUser); try { queryStatInfo.setTraceId(traceId) - .setClassId(queryStructCmd.getModelId()) + .setModelId(queryStructCmd.getModelId()) .setUser(user) .setQueryType(QueryTypeEnum.STRUCT.getValue()) .setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState()) @@ -105,6 +154,25 @@ public class StatUtils { } + private List getFieldNames(List allFields, List schemaItems) { + Set fieldNames = schemaItems + .stream() + .map(dimSchemaResp -> dimSchemaResp.getBizName()) + .collect(Collectors.toSet()); + if (!CollectionUtils.isEmpty(fieldNames)) { + return allFields.stream().filter(fieldName -> fieldNames.contains(fieldName)) + .collect(Collectors.toList()); + } + return new ArrayList<>(); + } + + private String getUserName(User facadeUser) { + return (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName() + : "Admin"; + } + + + public List getStatInfo(ItemUseReq itemUseCommend) { return statRepository.getStatInfo(itemUseCommend); } diff --git a/semantic/query/src/main/resources/mapper/StatMapper.xml b/semantic/query/src/main/resources/mapper/StatMapper.xml index a67f1729d..dec96b377 100644 --- a/semantic/query/src/main/resources/mapper/StatMapper.xml +++ b/semantic/query/src/main/resources/mapper/StatMapper.xml @@ -64,6 +64,12 @@ and model_id = #{modelId} + + and model_id in + + #{id} + + and metrics like concat('%',#{metric},'%')