(improvement)(Headless) Migrate the map interface to headless, and change agentId to dataSetIds. (#899)

This commit is contained in:
lexluo09
2024-04-09 10:58:40 +08:00
committed by GitHub
parent a29ba2257a
commit 43b9d5545d
14 changed files with 176 additions and 88 deletions

View File

@@ -33,6 +33,8 @@ public class MetaFilter {
private List<Long> ids;
private List<String> names;
private List<String> fieldsDepend;
private Integer isTag;

View File

@@ -2,59 +2,20 @@ package com.tencent.supersonic.headless.server.pojo;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
@Data
public class ModelCluster {
private static final String split = "_";
private Set<Long> modelIds = new LinkedHashSet<>();
private Set<String> modelNames = new LinkedHashSet<>();
private String key;
private String name;
public static ModelCluster build(Set<Long> modelIds) {
ModelCluster modelCluster = new ModelCluster();
modelCluster.setModelIds(modelIds);
modelCluster.setKey(StringUtils.join(modelIds, split));
return modelCluster;
}
public static ModelCluster build(String key) {
ModelCluster modelCluster = new ModelCluster();
modelCluster.setModelIds(getModelIdFromKey(key));
modelCluster.setKey(key);
return modelCluster;
}
public void buildName(Map<Long, String> modelNameMap) {
modelNames = modelNameMap.entrySet().stream().filter(entry ->
modelIds.contains(entry.getKey())).map(Map.Entry::getValue)
.collect(Collectors.toSet());
name = String.join(split, modelNames);
}
public static Set<Long> getModelIdFromKey(String key) {
return Arrays.stream(key.split(split))
.map(Long::parseLong).collect(Collectors.toSet());
}
public Long getFirstModel() {
if (CollectionUtils.isEmpty(modelIds)) {
return -1L;
}
return new ArrayList<>(modelIds).get(0);
}
}

View File

@@ -0,0 +1,33 @@
package com.tencent.supersonic.headless.server.rest.api;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.auth.api.authentication.utils.UserHolder;
import com.tencent.supersonic.headless.api.pojo.request.QueryMapReq;
import com.tencent.supersonic.headless.server.service.MetaDiscoveryService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@RestController
@RequestMapping("/api/semantic/query")
@Slf4j
public class MetaDiscoveryApiController {
@Autowired
private MetaDiscoveryService metaDiscovery;
@PostMapping("map")
public Object map(@RequestBody QueryMapReq queryMapReq,
HttpServletRequest request, HttpServletResponse response) throws Exception {
User user = UserHolder.findUser(request, response);
queryMapReq.setUser(user);
return metaDiscovery.getMapMeta(queryMapReq);
}
}

View File

@@ -30,6 +30,10 @@ public interface DataSetService {
List<DataSetResp> getDataSets(String dataSetName, User user);
List<DataSetResp> getDataSets(List<String> dataSetNames, User user);
Map<Long, String> getDataSetIdToNameMap(List<Long> dataSetIds);
List<DataSetResp> getDataSetsInheritAuth(User user, Long domainId);
SemanticQueryReq convert(QueryDataSetReq queryDataSetReq);

View File

@@ -0,0 +1,10 @@
package com.tencent.supersonic.headless.server.service;
import com.tencent.supersonic.headless.api.pojo.request.QueryMapReq;
import com.tencent.supersonic.headless.api.pojo.response.MapInfoResp;
public interface MetaDiscoveryService {
MapInfoResp getMapMeta(QueryMapReq queryMapReq);
}

View File

@@ -38,17 +38,17 @@ import com.tencent.supersonic.headless.api.pojo.response.QueryResult;
import com.tencent.supersonic.headless.api.pojo.response.QueryState;
import com.tencent.supersonic.headless.api.pojo.response.SemanticQueryResp;
import com.tencent.supersonic.headless.core.chat.corrector.SemanticCorrector;
import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeService;
import com.tencent.supersonic.headless.core.chat.knowledge.SearchService;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.core.chat.mapper.SchemaMapper;
import com.tencent.supersonic.headless.core.chat.parser.SemanticParser;
import com.tencent.supersonic.headless.core.chat.query.QueryManager;
import com.tencent.supersonic.headless.core.chat.query.SemanticQuery;
import com.tencent.supersonic.headless.core.chat.query.llm.s2sql.LLMSqlQuery;
import com.tencent.supersonic.headless.core.chat.query.rule.RuleSemanticQuery;
import com.tencent.supersonic.headless.core.chat.knowledge.HanlpMapResult;
import com.tencent.supersonic.headless.core.chat.knowledge.KnowledgeService;
import com.tencent.supersonic.headless.core.chat.knowledge.SearchService;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.HanlpHelper;
import com.tencent.supersonic.headless.core.chat.knowledge.helper.NatureHelper;
import com.tencent.supersonic.headless.core.pojo.ChatContext;
import com.tencent.supersonic.headless.core.pojo.QueryContext;
import com.tencent.supersonic.headless.server.persistence.dataobject.StatisticsDO;

View File

@@ -131,18 +131,38 @@ public class DataSetServiceImpl
@Override
public List<DataSetResp> getDataSets(User user) {
List<DataSetResp> dataSetResps = getDataSetList(new MetaFilter(), user);
return getDataSetFilterByAuth(dataSetResps, user);
MetaFilter metaFilter = new MetaFilter();
return getDataSetsByAuth(user, metaFilter);
}
@Override
public List<DataSetResp> getDataSets(String dataSetName, User user) {
MetaFilter metaFilter = new MetaFilter();
metaFilter.setName(dataSetName);
return getDataSetsByAuth(user, metaFilter);
}
@Override
public List<DataSetResp> getDataSets(List<String> dataSetNames, User user) {
MetaFilter metaFilter = new MetaFilter();
metaFilter.setNames(dataSetNames);
return getDataSetsByAuth(user, metaFilter);
}
private List<DataSetResp> getDataSetsByAuth(User user, MetaFilter metaFilter) {
List<DataSetResp> dataSetResps = getDataSetList(metaFilter, user);
return getDataSetFilterByAuth(dataSetResps, user);
}
@Override
public Map<Long, String> getDataSetIdToNameMap(List<Long> dataSetIds) {
MetaFilter metaFilter = new MetaFilter();
metaFilter.setIds(dataSetIds);
List<DataSetResp> dataSetResps = getDataSetList(metaFilter, User.getFakeUser());
return dataSetResps.stream().collect(
Collectors.toMap(DataSetResp::getId, DataSetResp::getName, (k1, k2) -> k1));
}
@Override
public List<DataSetResp> getDataSetsInheritAuth(User user, Long domainId) {
List<DataSetResp> dataSetResps = getDataSetList(new MetaFilter(), user);

View File

@@ -0,0 +1,138 @@
package com.tencent.supersonic.headless.server.service.impl;
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
import com.tencent.supersonic.headless.api.pojo.SchemaElementMatch;
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
import com.tencent.supersonic.headless.api.pojo.SchemaMapInfo;
import com.tencent.supersonic.headless.api.pojo.SemanticSchema;
import com.tencent.supersonic.headless.api.pojo.request.QueryMapReq;
import com.tencent.supersonic.headless.api.pojo.request.QueryReq;
import com.tencent.supersonic.headless.api.pojo.response.DataSetResp;
import com.tencent.supersonic.headless.api.pojo.response.MapInfoResp;
import com.tencent.supersonic.headless.api.pojo.response.MapResp;
import com.tencent.supersonic.headless.core.chat.knowledge.builder.BaseWordBuilder;
import com.tencent.supersonic.headless.server.service.ChatQueryService;
import com.tencent.supersonic.headless.server.service.DataSetService;
import com.tencent.supersonic.headless.server.service.MetaDiscoveryService;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@Service
public class MetaDiscoveryServiceImpl implements MetaDiscoveryService {
@Autowired
private DataSetService dataSetService;
@Autowired
private ChatQueryService chatQueryService;
@Autowired
private SemanticService semanticService;
@Override
public MapInfoResp getMapMeta(QueryMapReq queryMapReq) {
QueryReq queryReq = new QueryReq();
BeanUtils.copyProperties(queryMapReq, queryReq);
List<DataSetResp> dataSets = dataSetService.getDataSets(queryMapReq.getDataSetNames(), queryMapReq.getUser());
Set<Long> dataSetIds = dataSets.stream().map(dataSetResp -> dataSetResp.getId()).collect(Collectors.toSet());
queryReq.setDataSetIds(dataSetIds);
MapResp mapResp = chatQueryService.performMapping(queryReq);
return convert(mapResp, queryMapReq.getTopN());
}
public MapInfoResp convert(MapResp mapResp, Integer topN) {
MapInfoResp mapInfoResp = new MapInfoResp();
if (Objects.isNull(mapResp)) {
return mapInfoResp;
}
BeanUtils.copyProperties(mapResp, mapInfoResp);
Set<Long> dataSetIds = mapResp.getMapInfo().getDataSetElementMatches().keySet();
Map<Long, String> dataSetMap = dataSetService.getDataSetIdToNameMap(new ArrayList<>(dataSetIds));
mapInfoResp.setMapFields(getMapFields(mapResp.getMapInfo(), dataSetMap));
mapInfoResp.setTopFields(getTopFields(topN, mapResp.getMapInfo(), dataSetMap));
return mapInfoResp;
}
private Map<String, List<SchemaElementMatch>> getMapFields(SchemaMapInfo mapInfo,
Map<Long, String> dataSetMap) {
Map<String, List<SchemaElementMatch>> result = new HashMap<>();
for (Map.Entry<Long, List<SchemaElementMatch>> entry : mapInfo.getDataSetElementMatches().entrySet()) {
List<SchemaElementMatch> values = entry.getValue();
if (CollectionUtils.isNotEmpty(values) && dataSetMap.containsKey(entry.getKey())) {
result.put(dataSetMap.get(entry.getKey()), values);
}
}
return result;
}
private Map<String, List<SchemaElementMatch>> getTopFields(Integer topN,
SchemaMapInfo mapInfo,
Map<Long, String> dataSetMap) {
Set<Long> dataSetIds = mapInfo.getDataSetElementMatches().keySet();
Map<String, List<SchemaElementMatch>> result = new HashMap<>();
SemanticSchema semanticSchema = semanticService.getSemanticSchema();
for (Long dataSetId : dataSetIds) {
String dataSetName = dataSetMap.get(dataSetId);
if (StringUtils.isBlank(dataSetName)) {
continue;
}
//topN dimensions
Set<SchemaElementMatch> dimensions = semanticSchema.getDimensions(dataSetId)
.stream().sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(topN - 1).map(mergeFunction()).collect(Collectors.toSet());
SchemaElementMatch timeDimensionMatch = getTimeDimension(dataSetId, dataSetName);
dimensions.add(timeDimensionMatch);
//topN metrics
Set<SchemaElementMatch> metrics = semanticSchema.getMetrics(dataSetId)
.stream().sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
.limit(topN).map(mergeFunction()).collect(Collectors.toSet());
dimensions.addAll(metrics);
result.put(dataSetName, new ArrayList<>(dimensions));
}
return result;
}
/***
* get time dimension SchemaElementMatch
* @param dataSetId
* @param dataSetName
* @return
*/
private SchemaElementMatch getTimeDimension(Long dataSetId, String dataSetName) {
SchemaElement element = SchemaElement.builder().dataSet(dataSetId).dataSetName(dataSetName)
.type(SchemaElementType.DIMENSION).bizName(TimeDimensionEnum.DAY.getName()).build();
SchemaElementMatch timeDimensionMatch = SchemaElementMatch.builder().element(element)
.detectWord(TimeDimensionEnum.DAY.getChName()).word(TimeDimensionEnum.DAY.getChName())
.similarity(1L).frequency(BaseWordBuilder.DEFAULT_FREQUENCY).build();
return timeDimensionMatch;
}
private Function<SchemaElement, SchemaElementMatch> mergeFunction() {
return schemaElement -> SchemaElementMatch.builder().element(schemaElement)
.frequency(BaseWordBuilder.DEFAULT_FREQUENCY).word(schemaElement.getName()).similarity(1)
.detectWord(schemaElement.getName()).build();
}
}