(improvement)(project) dsl support get topN dimension/metric by useCount and fix semanticService get dimension/metric usrCount error (#105)

This commit is contained in:
lexluo09
2023-09-20 11:21:50 +08:00
committed by GitHub
parent c8ff37e304
commit 98656eb445
12 changed files with 154 additions and 38 deletions

View File

@@ -16,4 +16,10 @@ public class LLMParserConfig {
@Value("${query2sql.path:/query2sql}")
private String queryToSqlPath;
@Value("${dimension.topn:5}")
private Integer dimensionTopN;
@Value("${metric.topn:5}")
private Integer metricTopN;
}

View File

@@ -39,6 +39,7 @@ import com.tencent.supersonic.semantic.api.model.enums.TimeDimensionEnum;
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -87,7 +88,7 @@ public class LLMDslParser implements SemanticParser {
return;
}
LLMReq llmReq = getLlmReq(queryCtx, modelId);
LLMReq llmReq = getLlmReq(queryCtx, modelId, llmParserConfig);
LLMResp llmResp = requestLLM(llmReq, modelId, llmParserConfig);
if (Objects.isNull(llmResp)) {
@@ -340,22 +341,28 @@ public class LLMDslParser implements SemanticParser {
return null;
}
private LLMReq getLlmReq(QueryContext queryCtx, Long modelId) {
private LLMReq getLlmReq(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) {
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
Map<Long, String> modelIdToName = semanticSchema.getModelIdToName();
String queryText = queryCtx.getRequest().getQueryText();
LLMReq llmReq = new LLMReq();
llmReq.setQueryText(queryText);
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
llmSchema.setModelName(modelIdToName.get(modelId));
llmSchema.setDomainName(modelIdToName.get(modelId));
List<String> fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema);
List<String> fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema, llmParserConfig);
fieldNameList.add(BaseSemanticCorrector.DATE_FIELD);
llmSchema.setFieldNameList(fieldNameList);
llmReq.setSchema(llmSchema);
List<ElementValue> linking = new ArrayList<>();
linking.addAll(getValueList(queryCtx, modelId, semanticSchema));
llmReq.setLinking(linking);
String currentDate = DSLDateHelper.getReferenceDate(modelId);
llmReq.setCurrentDate(currentDate);
return llmReq;
@@ -399,12 +406,27 @@ public class LLMDslParser implements SemanticParser {
}
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) {
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema,
LLMParserConfig llmParserConfig) {
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
Set<String> results = semanticSchema.getDimensions().stream()
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(llmParserConfig.getDimensionTopN())
.map(entry -> entry.getName())
.collect(Collectors.toSet());
Set<String> metrics = semanticSchema.getMetrics().stream()
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(llmParserConfig.getMetricTopN())
.map(entry -> entry.getName())
.collect(Collectors.toSet());
results.addAll(metrics);
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
if (CollectionUtils.isEmpty(matchedElements)) {
return new ArrayList<>();
return new ArrayList<>(results);
}
Set<String> fieldNameList = matchedElements.stream()
.filter(schemaElementMatch -> {
@@ -423,7 +445,8 @@ public class LLMDslParser implements SemanticParser {
})
.filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%"))
.collect(Collectors.toSet());
return new ArrayList<>(fieldNameList);
results.addAll(fieldNameList);
return new ArrayList<>(results);
}
protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) {

View File

@@ -40,4 +40,5 @@ public class DefaultSemanticConfig {
@Value("${explain.path:/api/semantic/query/explain}")
private String explainPath;
}

View File

@@ -81,8 +81,9 @@ public class LocalSemanticLayer extends BaseSemanticLayer {
public List<ModelSchemaResp> doFetchModelSchema(List<Long> ids) {
ModelSchemaFilterReq filter = new ModelSchemaFilterReq();
filter.setModelIds(ids);
modelService = ContextUtils.getBean(ModelService.class);
return modelService.fetchModelSchema(filter);
schemaService = ContextUtils.getBean(SchemaService.class);
User user = User.getFakeUser();
return schemaService.fetchModelSchema(filter, user);
}
@Override

View File

@@ -18,7 +18,7 @@ public class SchemaService {
public static final String ALL_CACHE = "all";
private static final Integer META_CACHE_TIME = 5;
private static final Integer META_CACHE_TIME = 2;
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()

View File

@@ -79,7 +79,7 @@ public class QueryStat {
return this;
}
public QueryStat setClassId(Long modelId) {
public QueryStat setModelId(Long modelId) {
this.modelId = modelId;
return this;
}

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.semantic.api.query.request;
import java.util.List;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.ToString;
@@ -11,6 +12,7 @@ public class ItemUseReq {
private String startTime;
private Long modelId;
private List<Long> modelIds;
private Boolean cacheEnable = true;
private String metric;
@@ -18,4 +20,8 @@ public class ItemUseReq {
this.startTime = startTime;
this.modelId = modelId;
}
public ItemUseReq(String startTime, List<Long> modelIds) {
this.startTime = startTime;
this.modelIds = modelIds;
}
}

View File

@@ -4,44 +4,43 @@ import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.auth.api.authentication.service.UserService;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.common.util.BeanMapper;
import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.semantic.api.model.request.ModelReq;
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.model.domain.DatabaseService;
import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.model.domain.DomainService;
import com.tencent.supersonic.semantic.model.domain.DimensionService;
import com.tencent.supersonic.semantic.model.domain.MetricService;
import com.tencent.supersonic.semantic.model.domain.DatasourceService;
import com.tencent.supersonic.semantic.model.domain.DimensionService;
import com.tencent.supersonic.semantic.model.domain.DomainService;
import com.tencent.supersonic.semantic.model.domain.MetricService;
import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.model.domain.dataobject.ModelDO;
import com.tencent.supersonic.semantic.model.domain.pojo.Model;
import com.tencent.supersonic.semantic.model.domain.repository.ModelRepository;
import com.tencent.supersonic.semantic.model.domain.utils.ModelConvert;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Objects;
import java.util.Date;
import java.util.Set;
import java.util.Map;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.stream.Collectors;
@Slf4j
@Service

View File

@@ -67,8 +67,11 @@ public class QueryServiceImpl implements QueryService {
@Override
public Object queryBySql(QueryDslReq querySqlCmd, User user) throws Exception {
statUtils.initStatInfo(querySqlCmd, user);
QueryStatement queryStatement = convertToQueryStatement(querySqlCmd, user);
return semanticQueryEngine.execute(queryStatement);
QueryResultWithSchemaResp results = semanticQueryEngine.execute(queryStatement);
statUtils.statInfo2DbAsync(TaskStatusEnum.SUCCESS);
return results;
}
private QueryStatement convertToQueryStatement(QueryDslReq querySqlCmd, User user) throws Exception {

View File

@@ -55,7 +55,10 @@ public class SchemaServiceImpl implements SchemaService {
@Override
public List<ModelSchemaResp> fetchModelSchema(ModelSchemaFilterReq filter, User user) {
List<ModelSchemaResp> domainSchemaDescList = modelService.fetchModelSchema(filter);
List<ItemUseResp> statInfos = queryService.getStatInfo(new ItemUseReq());
ItemUseReq itemUseCommend = new ItemUseReq();
itemUseCommend.setModelIds(filter.getModelIds());
List<ItemUseResp> statInfos = queryService.getStatInfo(itemUseCommend);
log.debug("statInfos:{}", statInfos);
fillCnt(domainSchemaDescList, statInfos);
return domainSchemaDescList;

View File

@@ -4,22 +4,30 @@ import com.alibaba.ttl.TransmittableThreadLocal;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
import com.tencent.supersonic.semantic.api.model.enums.QueryTypeBackEnum;
import com.tencent.supersonic.semantic.api.model.enums.QueryTypeEnum;
import com.tencent.supersonic.semantic.api.model.pojo.QueryStat;
import com.tencent.supersonic.semantic.api.model.pojo.SchemaItem;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.ItemUseReq;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import com.tencent.supersonic.semantic.api.query.response.ItemUseResp;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.query.persistence.repository.StatRepository;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.logging.log4j.util.Strings;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
@Component
@Slf4j
@@ -28,13 +36,17 @@ public class StatUtils {
private static final TransmittableThreadLocal<QueryStat> STATS = new TransmittableThreadLocal<>();
private final StatRepository statRepository;
private final SqlFilterUtils sqlFilterUtils;
private final ModelService modelService;
private final ObjectMapper objectMapper = new ObjectMapper();
public StatUtils(StatRepository statRepository,
SqlFilterUtils sqlFilterUtils) {
SqlFilterUtils sqlFilterUtils,
ModelService modelService) {
this.statRepository = statRepository;
this.sqlFilterUtils = sqlFilterUtils;
this.modelService = modelService;
}
public static QueryStat get() {
@@ -69,6 +81,44 @@ public class StatUtils {
return true;
}
public void initStatInfo(QueryDslReq queryDslReq, User facadeUser) {
QueryStat queryStatInfo = new QueryStat();
List<String> allFields = SqlParserSelectHelper.getAllFields(queryDslReq.getSql());
queryStatInfo.setModelId(queryDslReq.getModelId());
ModelSchemaResp modelSchemaResp = modelService.fetchSingleModelSchema(queryDslReq.getModelId());
List<String> dimensions = new ArrayList<>();
if (Objects.nonNull(modelSchemaResp)) {
dimensions = getFieldNames(allFields, modelSchemaResp.getDimensions());
}
List<String> metrics = new ArrayList<>();
if (Objects.nonNull(modelSchemaResp)) {
metrics = getFieldNames(allFields, modelSchemaResp.getMetrics());
}
String userName = getUserName(facadeUser);
try {
queryStatInfo.setTraceId("")
.setModelId(queryDslReq.getModelId())
.setUser(userName)
.setQueryType(QueryTypeEnum.SQL.getValue())
.setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState())
.setQuerySqlCmd(queryDslReq.toString())
.setQuerySqlCmdMd5(DigestUtils.md5Hex(queryDslReq.toString()))
.setStartTime(System.currentTimeMillis())
.setUseResultCache(true)
.setUseSqlCache(true)
.setMetrics(objectMapper.writeValueAsString(metrics))
.setDimensions(objectMapper.writeValueAsString(dimensions));
} catch (JsonProcessingException e) {
log.error("initStatInfo:{}", e);
}
StatUtils.set(queryStatInfo);
}
public void initStatInfo(QueryStructReq queryStructCmd, User facadeUser) {
QueryStat queryStatInfo = new QueryStat();
String traceId = "";
@@ -76,12 +126,11 @@ public class StatUtils {
List<String> metrics = new ArrayList<>();
queryStructCmd.getAggregators().stream().forEach(aggregator -> metrics.add(aggregator.getColumn()));
String user = (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName()
: "Admin";
String user = getUserName(facadeUser);
try {
queryStatInfo.setTraceId(traceId)
.setClassId(queryStructCmd.getModelId())
.setModelId(queryStructCmd.getModelId())
.setUser(user)
.setQueryType(QueryTypeEnum.STRUCT.getValue())
.setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState())
@@ -105,6 +154,25 @@ public class StatUtils {
}
private List<String> getFieldNames(List<String> allFields, List<? extends SchemaItem> schemaItems) {
Set<String> fieldNames = schemaItems
.stream()
.map(dimSchemaResp -> dimSchemaResp.getBizName())
.collect(Collectors.toSet());
if (!CollectionUtils.isEmpty(fieldNames)) {
return allFields.stream().filter(fieldName -> fieldNames.contains(fieldName))
.collect(Collectors.toList());
}
return new ArrayList<>();
}
private String getUserName(User facadeUser) {
return (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName()
: "Admin";
}
public List<ItemUseResp> getStatInfo(ItemUseReq itemUseCommend) {
return statRepository.getStatInfo(itemUseCommend);
}

View File

@@ -64,6 +64,12 @@
<if test="modelId != null">
and model_id = #{modelId}
</if>
<if test="modelIds != null and modelIds.size() > 0">
and model_id in
<foreach item="id" collection="modelIds" open="(" separator="," close=")">
#{id}
</foreach>
</if>
<if test="metric != null">
and metrics like concat('%',#{metric},'%')
</if>