(improvement)(project) dsl support get topN dimension/metric by useCount and fix semanticService get dimension/metric usrCount error (#105)

This commit is contained in:
lexluo09
2023-09-20 11:21:50 +08:00
committed by GitHub
parent c8ff37e304
commit 98656eb445
12 changed files with 154 additions and 38 deletions

View File

@@ -16,4 +16,10 @@ public class LLMParserConfig {
@Value("${query2sql.path:/query2sql}") @Value("${query2sql.path:/query2sql}")
private String queryToSqlPath; private String queryToSqlPath;
@Value("${dimension.topn:5}")
private Integer dimensionTopN;
@Value("${metric.topn:5}")
private Integer metricTopN;
} }

View File

@@ -39,6 +39,7 @@ import com.tencent.supersonic.semantic.api.model.enums.TimeDimensionEnum;
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum; import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@@ -87,7 +88,7 @@ public class LLMDslParser implements SemanticParser {
return; return;
} }
LLMReq llmReq = getLlmReq(queryCtx, modelId); LLMReq llmReq = getLlmReq(queryCtx, modelId, llmParserConfig);
LLMResp llmResp = requestLLM(llmReq, modelId, llmParserConfig); LLMResp llmResp = requestLLM(llmReq, modelId, llmParserConfig);
if (Objects.isNull(llmResp)) { if (Objects.isNull(llmResp)) {
@@ -340,22 +341,28 @@ public class LLMDslParser implements SemanticParser {
return null; return null;
} }
private LLMReq getLlmReq(QueryContext queryCtx, Long modelId) { private LLMReq getLlmReq(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) {
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema(); SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
Map<Long, String> modelIdToName = semanticSchema.getModelIdToName(); Map<Long, String> modelIdToName = semanticSchema.getModelIdToName();
String queryText = queryCtx.getRequest().getQueryText(); String queryText = queryCtx.getRequest().getQueryText();
LLMReq llmReq = new LLMReq(); LLMReq llmReq = new LLMReq();
llmReq.setQueryText(queryText); llmReq.setQueryText(queryText);
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema(); LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
llmSchema.setModelName(modelIdToName.get(modelId)); llmSchema.setModelName(modelIdToName.get(modelId));
llmSchema.setDomainName(modelIdToName.get(modelId)); llmSchema.setDomainName(modelIdToName.get(modelId));
List<String> fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema);
List<String> fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema, llmParserConfig);
fieldNameList.add(BaseSemanticCorrector.DATE_FIELD); fieldNameList.add(BaseSemanticCorrector.DATE_FIELD);
llmSchema.setFieldNameList(fieldNameList); llmSchema.setFieldNameList(fieldNameList);
llmReq.setSchema(llmSchema); llmReq.setSchema(llmSchema);
List<ElementValue> linking = new ArrayList<>(); List<ElementValue> linking = new ArrayList<>();
linking.addAll(getValueList(queryCtx, modelId, semanticSchema)); linking.addAll(getValueList(queryCtx, modelId, semanticSchema));
llmReq.setLinking(linking); llmReq.setLinking(linking);
String currentDate = DSLDateHelper.getReferenceDate(modelId); String currentDate = DSLDateHelper.getReferenceDate(modelId);
llmReq.setCurrentDate(currentDate); llmReq.setCurrentDate(currentDate);
return llmReq; return llmReq;
@@ -399,12 +406,27 @@ public class LLMDslParser implements SemanticParser {
} }
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) { protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema,
LLMParserConfig llmParserConfig) {
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema); Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
Set<String> results = semanticSchema.getDimensions().stream()
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(llmParserConfig.getDimensionTopN())
.map(entry -> entry.getName())
.collect(Collectors.toSet());
Set<String> metrics = semanticSchema.getMetrics().stream()
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(llmParserConfig.getMetricTopN())
.map(entry -> entry.getName())
.collect(Collectors.toSet());
results.addAll(metrics);
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId); List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
if (CollectionUtils.isEmpty(matchedElements)) { if (CollectionUtils.isEmpty(matchedElements)) {
return new ArrayList<>(); return new ArrayList<>(results);
} }
Set<String> fieldNameList = matchedElements.stream() Set<String> fieldNameList = matchedElements.stream()
.filter(schemaElementMatch -> { .filter(schemaElementMatch -> {
@@ -423,7 +445,8 @@ public class LLMDslParser implements SemanticParser {
}) })
.filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%")) .filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%"))
.collect(Collectors.toSet()); .collect(Collectors.toSet());
return new ArrayList<>(fieldNameList); results.addAll(fieldNameList);
return new ArrayList<>(results);
} }
protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) { protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) {

View File

@@ -40,4 +40,5 @@ public class DefaultSemanticConfig {
@Value("${explain.path:/api/semantic/query/explain}") @Value("${explain.path:/api/semantic/query/explain}")
private String explainPath; private String explainPath;
} }

View File

@@ -81,8 +81,9 @@ public class LocalSemanticLayer extends BaseSemanticLayer {
public List<ModelSchemaResp> doFetchModelSchema(List<Long> ids) { public List<ModelSchemaResp> doFetchModelSchema(List<Long> ids) {
ModelSchemaFilterReq filter = new ModelSchemaFilterReq(); ModelSchemaFilterReq filter = new ModelSchemaFilterReq();
filter.setModelIds(ids); filter.setModelIds(ids);
modelService = ContextUtils.getBean(ModelService.class); schemaService = ContextUtils.getBean(SchemaService.class);
return modelService.fetchModelSchema(filter); User user = User.getFakeUser();
return schemaService.fetchModelSchema(filter, user);
} }
@Override @Override

View File

@@ -18,7 +18,7 @@ public class SchemaService {
public static final String ALL_CACHE = "all"; public static final String ALL_CACHE = "all";
private static final Integer META_CACHE_TIME = 5; private static final Integer META_CACHE_TIME = 2;
private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer(); private SemanticLayer semanticLayer = ComponentFactory.getSemanticLayer();
private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder() private LoadingCache<String, SemanticSchema> cache = CacheBuilder.newBuilder()

View File

@@ -79,7 +79,7 @@ public class QueryStat {
return this; return this;
} }
public QueryStat setClassId(Long modelId) { public QueryStat setModelId(Long modelId) {
this.modelId = modelId; this.modelId = modelId;
return this; return this;
} }

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.semantic.api.query.request; package com.tencent.supersonic.semantic.api.query.request;
import java.util.List;
import lombok.Data; import lombok.Data;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.ToString; import lombok.ToString;
@@ -11,6 +12,7 @@ public class ItemUseReq {
private String startTime; private String startTime;
private Long modelId; private Long modelId;
private List<Long> modelIds;
private Boolean cacheEnable = true; private Boolean cacheEnable = true;
private String metric; private String metric;
@@ -18,4 +20,8 @@ public class ItemUseReq {
this.startTime = startTime; this.startTime = startTime;
this.modelId = modelId; this.modelId = modelId;
} }
public ItemUseReq(String startTime, List<Long> modelIds) {
this.startTime = startTime;
this.modelIds = modelIds;
}
} }

View File

@@ -4,44 +4,43 @@ import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.auth.api.authentication.service.UserService; import com.tencent.supersonic.auth.api.authentication.service.UserService;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.common.util.BeanMapper; import com.tencent.supersonic.common.util.BeanMapper;
import com.tencent.supersonic.common.util.JsonUtil; import com.tencent.supersonic.common.util.JsonUtil;
import com.tencent.supersonic.common.pojo.enums.AuthType;
import com.tencent.supersonic.semantic.api.model.request.ModelReq; import com.tencent.supersonic.semantic.api.model.request.ModelReq;
import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq; import com.tencent.supersonic.semantic.api.model.request.ModelSchemaFilterReq;
import com.tencent.supersonic.semantic.api.model.response.DatabaseResp; import com.tencent.supersonic.semantic.api.model.response.DatabaseResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.DatasourceResp; import com.tencent.supersonic.semantic.api.model.response.DatasourceResp;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
import com.tencent.supersonic.semantic.api.model.response.MetricSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.model.domain.DatabaseService; import com.tencent.supersonic.semantic.model.domain.DatabaseService;
import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.model.domain.DomainService;
import com.tencent.supersonic.semantic.model.domain.DimensionService;
import com.tencent.supersonic.semantic.model.domain.MetricService;
import com.tencent.supersonic.semantic.model.domain.DatasourceService; import com.tencent.supersonic.semantic.model.domain.DatasourceService;
import com.tencent.supersonic.semantic.model.domain.DimensionService;
import com.tencent.supersonic.semantic.model.domain.DomainService;
import com.tencent.supersonic.semantic.model.domain.MetricService;
import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.model.domain.dataobject.ModelDO; import com.tencent.supersonic.semantic.model.domain.dataobject.ModelDO;
import com.tencent.supersonic.semantic.model.domain.pojo.Model; import com.tencent.supersonic.semantic.model.domain.pojo.Model;
import com.tencent.supersonic.semantic.model.domain.repository.ModelRepository; import com.tencent.supersonic.semantic.model.domain.repository.ModelRepository;
import com.tencent.supersonic.semantic.model.domain.utils.ModelConvert; import com.tencent.supersonic.semantic.model.domain.utils.ModelConvert;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.BeanUtils; import org.springframework.beans.BeanUtils;
import org.springframework.context.annotation.Lazy; import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
import java.util.List;
import java.util.Objects;
import java.util.Date;
import java.util.Set;
import java.util.Map;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.stream.Collectors;
@Slf4j @Slf4j
@Service @Service

View File

@@ -67,8 +67,11 @@ public class QueryServiceImpl implements QueryService {
@Override @Override
public Object queryBySql(QueryDslReq querySqlCmd, User user) throws Exception { public Object queryBySql(QueryDslReq querySqlCmd, User user) throws Exception {
statUtils.initStatInfo(querySqlCmd, user);
QueryStatement queryStatement = convertToQueryStatement(querySqlCmd, user); QueryStatement queryStatement = convertToQueryStatement(querySqlCmd, user);
return semanticQueryEngine.execute(queryStatement); QueryResultWithSchemaResp results = semanticQueryEngine.execute(queryStatement);
statUtils.statInfo2DbAsync(TaskStatusEnum.SUCCESS);
return results;
} }
private QueryStatement convertToQueryStatement(QueryDslReq querySqlCmd, User user) throws Exception { private QueryStatement convertToQueryStatement(QueryDslReq querySqlCmd, User user) throws Exception {

View File

@@ -55,7 +55,10 @@ public class SchemaServiceImpl implements SchemaService {
@Override @Override
public List<ModelSchemaResp> fetchModelSchema(ModelSchemaFilterReq filter, User user) { public List<ModelSchemaResp> fetchModelSchema(ModelSchemaFilterReq filter, User user) {
List<ModelSchemaResp> domainSchemaDescList = modelService.fetchModelSchema(filter); List<ModelSchemaResp> domainSchemaDescList = modelService.fetchModelSchema(filter);
List<ItemUseResp> statInfos = queryService.getStatInfo(new ItemUseReq()); ItemUseReq itemUseCommend = new ItemUseReq();
itemUseCommend.setModelIds(filter.getModelIds());
List<ItemUseResp> statInfos = queryService.getStatInfo(itemUseCommend);
log.debug("statInfos:{}", statInfos); log.debug("statInfos:{}", statInfos);
fillCnt(domainSchemaDescList, statInfos); fillCnt(domainSchemaDescList, statInfos);
return domainSchemaDescList; return domainSchemaDescList;

View File

@@ -4,22 +4,30 @@ import com.alibaba.ttl.TransmittableThreadLocal;
import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.tencent.supersonic.auth.api.authentication.pojo.User; import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
import com.tencent.supersonic.semantic.api.model.enums.QueryTypeBackEnum; import com.tencent.supersonic.semantic.api.model.enums.QueryTypeBackEnum;
import com.tencent.supersonic.semantic.api.model.enums.QueryTypeEnum; import com.tencent.supersonic.semantic.api.model.enums.QueryTypeEnum;
import com.tencent.supersonic.semantic.api.model.pojo.QueryStat; import com.tencent.supersonic.semantic.api.model.pojo.QueryStat;
import com.tencent.supersonic.semantic.api.model.pojo.SchemaItem;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.query.request.ItemUseReq; import com.tencent.supersonic.semantic.api.query.request.ItemUseReq;
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq; import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
import com.tencent.supersonic.semantic.api.query.response.ItemUseResp; import com.tencent.supersonic.semantic.api.query.response.ItemUseResp;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum; import com.tencent.supersonic.semantic.model.domain.ModelService;
import com.tencent.supersonic.semantic.query.persistence.repository.StatRepository; import com.tencent.supersonic.semantic.query.persistence.repository.StatRepository;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.codec.digest.DigestUtils;
import org.apache.logging.log4j.util.Strings; import org.apache.logging.log4j.util.Strings;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
@Component @Component
@Slf4j @Slf4j
@@ -28,13 +36,17 @@ public class StatUtils {
private static final TransmittableThreadLocal<QueryStat> STATS = new TransmittableThreadLocal<>(); private static final TransmittableThreadLocal<QueryStat> STATS = new TransmittableThreadLocal<>();
private final StatRepository statRepository; private final StatRepository statRepository;
private final SqlFilterUtils sqlFilterUtils; private final SqlFilterUtils sqlFilterUtils;
private final ModelService modelService;
private final ObjectMapper objectMapper = new ObjectMapper(); private final ObjectMapper objectMapper = new ObjectMapper();
public StatUtils(StatRepository statRepository, public StatUtils(StatRepository statRepository,
SqlFilterUtils sqlFilterUtils) { SqlFilterUtils sqlFilterUtils,
ModelService modelService) {
this.statRepository = statRepository; this.statRepository = statRepository;
this.sqlFilterUtils = sqlFilterUtils; this.sqlFilterUtils = sqlFilterUtils;
this.modelService = modelService;
} }
public static QueryStat get() { public static QueryStat get() {
@@ -69,6 +81,44 @@ public class StatUtils {
return true; return true;
} }
public void initStatInfo(QueryDslReq queryDslReq, User facadeUser) {
QueryStat queryStatInfo = new QueryStat();
List<String> allFields = SqlParserSelectHelper.getAllFields(queryDslReq.getSql());
queryStatInfo.setModelId(queryDslReq.getModelId());
ModelSchemaResp modelSchemaResp = modelService.fetchSingleModelSchema(queryDslReq.getModelId());
List<String> dimensions = new ArrayList<>();
if (Objects.nonNull(modelSchemaResp)) {
dimensions = getFieldNames(allFields, modelSchemaResp.getDimensions());
}
List<String> metrics = new ArrayList<>();
if (Objects.nonNull(modelSchemaResp)) {
metrics = getFieldNames(allFields, modelSchemaResp.getMetrics());
}
String userName = getUserName(facadeUser);
try {
queryStatInfo.setTraceId("")
.setModelId(queryDslReq.getModelId())
.setUser(userName)
.setQueryType(QueryTypeEnum.SQL.getValue())
.setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState())
.setQuerySqlCmd(queryDslReq.toString())
.setQuerySqlCmdMd5(DigestUtils.md5Hex(queryDslReq.toString()))
.setStartTime(System.currentTimeMillis())
.setUseResultCache(true)
.setUseSqlCache(true)
.setMetrics(objectMapper.writeValueAsString(metrics))
.setDimensions(objectMapper.writeValueAsString(dimensions));
} catch (JsonProcessingException e) {
log.error("initStatInfo:{}", e);
}
StatUtils.set(queryStatInfo);
}
public void initStatInfo(QueryStructReq queryStructCmd, User facadeUser) { public void initStatInfo(QueryStructReq queryStructCmd, User facadeUser) {
QueryStat queryStatInfo = new QueryStat(); QueryStat queryStatInfo = new QueryStat();
String traceId = ""; String traceId = "";
@@ -76,12 +126,11 @@ public class StatUtils {
List<String> metrics = new ArrayList<>(); List<String> metrics = new ArrayList<>();
queryStructCmd.getAggregators().stream().forEach(aggregator -> metrics.add(aggregator.getColumn())); queryStructCmd.getAggregators().stream().forEach(aggregator -> metrics.add(aggregator.getColumn()));
String user = (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName() String user = getUserName(facadeUser);
: "Admin";
try { try {
queryStatInfo.setTraceId(traceId) queryStatInfo.setTraceId(traceId)
.setClassId(queryStructCmd.getModelId()) .setModelId(queryStructCmd.getModelId())
.setUser(user) .setUser(user)
.setQueryType(QueryTypeEnum.STRUCT.getValue()) .setQueryType(QueryTypeEnum.STRUCT.getValue())
.setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState()) .setQueryTypeBack(QueryTypeBackEnum.NORMAL.getState())
@@ -105,6 +154,25 @@ public class StatUtils {
} }
private List<String> getFieldNames(List<String> allFields, List<? extends SchemaItem> schemaItems) {
Set<String> fieldNames = schemaItems
.stream()
.map(dimSchemaResp -> dimSchemaResp.getBizName())
.collect(Collectors.toSet());
if (!CollectionUtils.isEmpty(fieldNames)) {
return allFields.stream().filter(fieldName -> fieldNames.contains(fieldName))
.collect(Collectors.toList());
}
return new ArrayList<>();
}
private String getUserName(User facadeUser) {
return (Objects.nonNull(facadeUser) && Strings.isNotEmpty(facadeUser.getName())) ? facadeUser.getName()
: "Admin";
}
public List<ItemUseResp> getStatInfo(ItemUseReq itemUseCommend) { public List<ItemUseResp> getStatInfo(ItemUseReq itemUseCommend) {
return statRepository.getStatInfo(itemUseCommend); return statRepository.getStatInfo(itemUseCommend);
} }

View File

@@ -64,6 +64,12 @@
<if test="modelId != null"> <if test="modelId != null">
and model_id = #{modelId} and model_id = #{modelId}
</if> </if>
<if test="modelIds != null and modelIds.size() > 0">
and model_id in
<foreach item="id" collection="modelIds" open="(" separator="," close=")">
#{id}
</foreach>
</if>
<if test="metric != null"> <if test="metric != null">
and metrics like concat('%',#{metric},'%') and metrics like concat('%',#{metric},'%')
</if> </if>