mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
(improvement)(headless) Headless integration embedding functionality, with support for viewId in embeddings. (#725)
This commit is contained in:
@@ -44,7 +44,7 @@ public abstract class BaseMatchStrategy<T> implements MatchStrategy<T> {
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<T> detect(QueryContext queryContext, List<S2Term> terms, Set<Long> detectModelIds) {
|
||||
public List<T> detect(QueryContext queryContext, List<S2Term> terms, Set<Long> detectViewIds) {
|
||||
Map<Integer, Integer> regOffsetToLength = getRegOffsetToLength(terms);
|
||||
String text = queryContext.getQueryText();
|
||||
Set<T> results = new HashSet<>();
|
||||
@@ -59,16 +59,16 @@ public abstract class BaseMatchStrategy<T> implements MatchStrategy<T> {
|
||||
if (index <= text.length()) {
|
||||
String detectSegment = text.substring(startIndex, index);
|
||||
detectSegments.add(detectSegment);
|
||||
detectByStep(queryContext, results, detectModelIds, startIndex, index, offset);
|
||||
detectByStep(queryContext, results, detectViewIds, startIndex, index, offset);
|
||||
}
|
||||
}
|
||||
startIndex = mapperHelper.getStepIndex(regOffsetToLength, startIndex);
|
||||
}
|
||||
detectByBatch(queryContext, results, detectModelIds, detectSegments);
|
||||
detectByBatch(queryContext, results, detectViewIds, detectSegments);
|
||||
return new ArrayList<>(results);
|
||||
}
|
||||
|
||||
protected void detectByBatch(QueryContext queryContext, Set<T> results, Set<Long> detectModelIds,
|
||||
protected void detectByBatch(QueryContext queryContext, Set<T> results, Set<Long> detectViewIds,
|
||||
Set<String> detectSegments) {
|
||||
return;
|
||||
}
|
||||
@@ -152,6 +152,6 @@ public abstract class BaseMatchStrategy<T> implements MatchStrategy<T> {
|
||||
public abstract String getMapKey(T a);
|
||||
|
||||
public abstract void detectByStep(QueryContext queryContext, Set<T> results,
|
||||
Set<Long> detectModelIds, Integer startIndex, Integer index, int offset);
|
||||
Set<Long> detectViewIds, Integer startIndex, Integer index, int offset);
|
||||
|
||||
}
|
||||
|
||||
@@ -37,9 +37,9 @@ public class DatabaseMatchStrategy extends BaseMatchStrategy<DatabaseMapResult>
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<DatabaseMapResult>> match(QueryContext queryContext, List<S2Term> terms,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Long> detectViewIds) {
|
||||
this.allElements = getSchemaElements(queryContext);
|
||||
return super.match(queryContext, terms, detectModelIds);
|
||||
return super.match(queryContext, terms, detectViewIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -54,7 +54,7 @@ public class DatabaseMatchStrategy extends BaseMatchStrategy<DatabaseMapResult>
|
||||
+ Constants.UNDERLINE + a.getSchemaElement().getName();
|
||||
}
|
||||
|
||||
public void detectByStep(QueryContext queryContext, Set<DatabaseMapResult> existResults, Set<Long> detectModelIds,
|
||||
public void detectByStep(QueryContext queryContext, Set<DatabaseMapResult> existResults, Set<Long> detectViewIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
String detectSegment = queryContext.getQueryText().substring(startIndex, index);
|
||||
if (StringUtils.isBlank(detectSegment)) {
|
||||
|
||||
@@ -1,20 +1,18 @@
|
||||
package com.tencent.supersonic.chat.core.mapper;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.tencent.supersonic.chat.core.pojo.QueryContext;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.core.pojo.QueryContext;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.EmbeddingResult;
|
||||
import com.tencent.supersonic.headless.core.knowledge.builder.BaseWordBuilder;
|
||||
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/***
|
||||
* A mapper that recognizes schema elements with vector embedding.
|
||||
@@ -39,15 +37,11 @@ public class EmbeddingMapper extends BaseMapper {
|
||||
|
||||
SchemaElement schemaElement = JSONObject.parseObject(JSONObject.toJSONString(matchResult.getMetadata()),
|
||||
SchemaElement.class);
|
||||
if (Objects.isNull(matchResult.getMetadata())) {
|
||||
Long viewId = Retrieval.getLongId(matchResult.getMetadata().get("viewId"));
|
||||
if (Objects.isNull(viewId)) {
|
||||
continue;
|
||||
}
|
||||
String modelIdStr = matchResult.getMetadata().get("modelId");
|
||||
if (StringUtils.isBlank(modelIdStr)) {
|
||||
continue;
|
||||
}
|
||||
long modelId = Long.parseLong(modelIdStr);
|
||||
schemaElement = getSchemaElement(modelId, schemaElement.getType(), elementId,
|
||||
schemaElement = getSchemaElement(viewId, schemaElement.getType(), elementId,
|
||||
queryContext.getSemanticSchema());
|
||||
if (schemaElement == null) {
|
||||
continue;
|
||||
@@ -60,7 +54,7 @@ public class EmbeddingMapper extends BaseMapper {
|
||||
.detectWord(matchResult.getDetectWord())
|
||||
.build();
|
||||
//3. add to mapInfo
|
||||
addToSchemaMap(queryContext.getMapInfo(), modelId, schemaElementMatch);
|
||||
addToSchemaMap(queryContext.getMapInfo(), viewId, schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,15 +4,13 @@ import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.core.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.headless.core.knowledge.EmbeddingResult;
|
||||
import com.tencent.supersonic.chat.core.pojo.QueryContext;
|
||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.util.ComponentFactory;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
|
||||
import com.tencent.supersonic.common.util.embedding.S2EmbeddingStore;
|
||||
import com.tencent.supersonic.headless.server.service.MetaEmbeddingService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
@@ -36,9 +34,7 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
private OptimizationConfig optimizationConfig;
|
||||
|
||||
@Autowired
|
||||
private EmbeddingConfig embeddingConfig;
|
||||
|
||||
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
|
||||
private MetaEmbeddingService metaEmbeddingService;
|
||||
|
||||
@Override
|
||||
public boolean needDelete(EmbeddingResult oneRoundResult, EmbeddingResult existResult) {
|
||||
@@ -52,7 +48,7 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void detectByBatch(QueryContext queryContext, Set<EmbeddingResult> results, Set<Long> detectModelIds,
|
||||
protected void detectByBatch(QueryContext queryContext, Set<EmbeddingResult> results, Set<Long> detectViewIds,
|
||||
Set<String> detectSegments) {
|
||||
|
||||
List<String> queryTextsList = detectSegments.stream()
|
||||
@@ -66,51 +62,29 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
optimizationConfig.getEmbeddingMapperBatch());
|
||||
|
||||
for (List<String> queryTextsSub : queryTextsSubList) {
|
||||
detectByQueryTextsSub(results, detectModelIds, queryTextsSub);
|
||||
detectByQueryTextsSub(results, detectViewIds, queryTextsSub);
|
||||
}
|
||||
}
|
||||
|
||||
private void detectByQueryTextsSub(Set<EmbeddingResult> results, Set<Long> detectModelIds,
|
||||
private void detectByQueryTextsSub(Set<EmbeddingResult> results, Set<Long> detectViewIds,
|
||||
List<String> queryTextsSub) {
|
||||
int embeddingNumber = optimizationConfig.getEmbeddingMapperNumber();
|
||||
Double distance = optimizationConfig.getEmbeddingMapperDistanceThreshold();
|
||||
Map<String, String> filterCondition = null;
|
||||
// step1. build query params
|
||||
// if only one modelId, add to filterCondition
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds) && detectModelIds.size() == 1) {
|
||||
filterCondition = new HashMap<>();
|
||||
filterCondition.put("modelId", detectModelIds.stream().findFirst().get().toString());
|
||||
}
|
||||
|
||||
RetrieveQuery retrieveQuery = RetrieveQuery.builder()
|
||||
.queryTextsList(queryTextsSub)
|
||||
.filterCondition(filterCondition)
|
||||
.queryEmbeddings(null)
|
||||
.build();
|
||||
RetrieveQuery retrieveQuery = RetrieveQuery.builder().queryTextsList(queryTextsSub).build();
|
||||
// step2. retrieveQuery by detectSegment
|
||||
List<RetrieveQueryResult> retrieveQueryResults = s2EmbeddingStore.retrieveQuery(
|
||||
embeddingConfig.getMetaCollectionName(), retrieveQuery, embeddingNumber);
|
||||
List<RetrieveQueryResult> retrieveQueryResults = metaEmbeddingService.retrieveQuery(
|
||||
new ArrayList<>(detectViewIds), retrieveQuery, embeddingNumber);
|
||||
|
||||
if (CollectionUtils.isEmpty(retrieveQueryResults)) {
|
||||
return;
|
||||
}
|
||||
// step3. build EmbeddingResults. filter by modelId
|
||||
// step3. build EmbeddingResults
|
||||
List<EmbeddingResult> collect = retrieveQueryResults.stream()
|
||||
.map(retrieveQueryResult -> {
|
||||
List<Retrieval> retrievals = retrieveQueryResult.getRetrieval();
|
||||
if (CollectionUtils.isNotEmpty(retrievals)) {
|
||||
retrievals.removeIf(retrieval -> retrieval.getDistance() > distance.doubleValue());
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds)) {
|
||||
retrievals.removeIf(retrieval -> {
|
||||
String modelIdStr = retrieval.getMetadata().get("modelId").toString();
|
||||
if (StringUtils.isBlank(modelIdStr)) {
|
||||
return true;
|
||||
}
|
||||
//return detectModelIds.contains(Long.parseLong(modelIdStr));
|
||||
Double modelId = Double.parseDouble(modelIdStr);
|
||||
return detectModelIds.contains(modelId.longValue());
|
||||
});
|
||||
}
|
||||
}
|
||||
return retrieveQueryResult;
|
||||
})
|
||||
@@ -121,6 +95,9 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
BeanUtils.copyProperties(retrieval, embeddingResult);
|
||||
embeddingResult.setDetectWord(retrieveQueryResult.getQuery());
|
||||
embeddingResult.setName(retrieval.getQuery());
|
||||
Map<String, String> convertedMap = retrieval.getMetadata().entrySet().stream()
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().toString()));
|
||||
embeddingResult.setMetadata(convertedMap);
|
||||
return embeddingResult;
|
||||
}))
|
||||
.collect(Collectors.toList());
|
||||
@@ -135,7 +112,7 @@ public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void detectByStep(QueryContext queryContext, Set<EmbeddingResult> existResults, Set<Long> detectModelIds,
|
||||
public void detectByStep(QueryContext queryContext, Set<EmbeddingResult> existResults, Set<Long> detectViewIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -36,15 +36,15 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<S2Term> terms,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Long> detectViewIds) {
|
||||
String text = queryContext.getQueryText();
|
||||
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("retryCount:{},terms:{},,detectModelIds:{}", terms, detectModelIds);
|
||||
log.debug("retryCount:{},terms:{},,detectModelIds:{}", terms, detectViewIds);
|
||||
|
||||
List<HanlpMapResult> detects = detect(queryContext, terms, detectModelIds);
|
||||
List<HanlpMapResult> detects = detect(queryContext, terms, detectViewIds);
|
||||
Map<MatchText, List<HanlpMapResult>> result = new HashMap<>();
|
||||
|
||||
result.put(MatchText.builder().regText(text).detectSegment(text).build(), detects);
|
||||
@@ -57,7 +57,7 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
&& existResult.getDetectWord().length() < oneRoundResult.getDetectWord().length();
|
||||
}
|
||||
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> existResults, Set<Long> detectModelIds,
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> existResults, Set<Long> detectViewIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
String text = queryContext.getQueryText();
|
||||
String detectSegment = text.substring(startIndex, index);
|
||||
@@ -65,11 +65,10 @@ public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
// step1. pre search
|
||||
Integer oneDetectionMaxSize = optimizationConfig.getOneDetectionMaxSize();
|
||||
LinkedHashSet<HanlpMapResult> hanlpMapResults = SearchService.prefixSearch(detectSegment, oneDetectionMaxSize,
|
||||
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
detectViewIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
// step2. suffix search
|
||||
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
|
||||
oneDetectionMaxSize, detectModelIds).stream()
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
oneDetectionMaxSize, detectViewIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
|
||||
|
||||
@@ -13,6 +13,6 @@ import java.util.Set;
|
||||
*/
|
||||
public interface MatchStrategy<T> {
|
||||
|
||||
Map<MatchText, List<T>> match(QueryContext queryContext, List<S2Term> terms, Set<Long> detectModelId);
|
||||
Map<MatchText, List<T>> match(QueryContext queryContext, List<S2Term> terms, Set<Long> detectViewIds);
|
||||
|
||||
}
|
||||
@@ -27,7 +27,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<S2Term> originals,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Long> detectViewIds) {
|
||||
String text = queryContext.getQueryText();
|
||||
Map<Integer, Integer> regOffsetToLength = getRegOffsetToLength(originals);
|
||||
|
||||
@@ -52,9 +52,9 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
if (StringUtils.isNotEmpty(detectSegment)) {
|
||||
List<HanlpMapResult> hanlpMapResults = SearchService.prefixSearch(detectSegment,
|
||||
SearchService.SEARCH_SIZE, detectModelIds);
|
||||
SearchService.SEARCH_SIZE, detectViewIds);
|
||||
List<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(
|
||||
detectSegment, SEARCH_SIZE, detectModelIds);
|
||||
detectSegment, SEARCH_SIZE, detectViewIds);
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
// remove entity name where search
|
||||
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
|
||||
@@ -88,7 +88,7 @@ public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> results, Set<Long> detectModelIds,
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> results, Set<Long> detectViewIds,
|
||||
Integer startIndex,
|
||||
Integer i, int offset) {
|
||||
|
||||
|
||||
@@ -97,12 +97,12 @@ public class SearchServiceImpl implements SearchService {
|
||||
List<S2Term> originals = knowledgeService.getTerms(queryText);
|
||||
log.info("hanlp parse result: {}", originals);
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
Set<Long> detectModelIds = mapperHelper.getViewIds(queryReq.getModelId(), agentService.getAgent(agentId));
|
||||
Set<Long> detectViewIds = mapperHelper.getViewIds(queryReq.getModelId(), agentService.getAgent(agentId));
|
||||
|
||||
QueryContext queryContext = new QueryContext();
|
||||
BeanUtils.copyProperties(queryReq, queryContext);
|
||||
Map<MatchText, List<HanlpMapResult>> regTextMap =
|
||||
searchMatchStrategy.match(queryContext, originals, detectModelIds);
|
||||
searchMatchStrategy.match(queryContext, originals, detectViewIds);
|
||||
regTextMap.entrySet().stream().forEach(m -> HanlpHelper.transLetterOriginal(m.getValue()));
|
||||
|
||||
// 4.get the most matching data
|
||||
|
||||
@@ -18,7 +18,7 @@ public class DataItem {
|
||||
|
||||
private TypeEnums type;
|
||||
|
||||
private Long modelId;
|
||||
private String modelId;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
package com.tencent.supersonic.common.util.embedding;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.Data;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import java.util.Map;
|
||||
import lombok.Data;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
@Data
|
||||
@@ -17,11 +17,30 @@ public class Retrieval {
|
||||
|
||||
protected Map<String, Object> metadata;
|
||||
|
||||
public static Long getLongId(String id) {
|
||||
if (StringUtils.isBlank(id)) {
|
||||
public static Long getLongId(Object id) {
|
||||
if (id == null || StringUtils.isBlank(id.toString())) {
|
||||
return null;
|
||||
}
|
||||
String[] split = id.split(DictWordType.NATURE_SPILT);
|
||||
String[] split = id.toString().split(Constants.UNDERLINE);
|
||||
return Long.parseLong(split[0]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
Retrieval retrieval = (Retrieval) o;
|
||||
return Double.compare(retrieval.distance, distance) == 0 && Objects.equal(id,
|
||||
retrieval.id) && Objects.equal(query, retrieval.query)
|
||||
&& Objects.equal(metadata, retrieval.metadata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(id, distance, query, metadata);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,12 +2,15 @@ package com.tencent.supersonic.headless.server.listener;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DataEvent;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.common.pojo.enums.EventType;
|
||||
import com.tencent.supersonic.common.util.ComponentFactory;
|
||||
import com.tencent.supersonic.common.util.embedding.EmbeddingQuery;
|
||||
import com.tencent.supersonic.common.util.embedding.S2EmbeddingStore;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
@@ -16,10 +19,6 @@ import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
|
||||
@@ -38,12 +37,13 @@ public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
|
||||
if (CollectionUtils.isEmpty(event.getDataItems())) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<EmbeddingQuery> embeddingQueries = event.getDataItems()
|
||||
.stream()
|
||||
.map(dataItem -> {
|
||||
EmbeddingQuery embeddingQuery = new EmbeddingQuery();
|
||||
embeddingQuery.setQueryId(
|
||||
dataItem.getId().toString() + DictWordType.NATURE_SPILT
|
||||
dataItem.getId().toString() + Constants.UNDERLINE
|
||||
+ dataItem.getType().name().toLowerCase());
|
||||
embeddingQuery.setQuery(dataItem.getName());
|
||||
Map meta = JSONObject.parseObject(JSONObject.toJSONString(dataItem), Map.class);
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.tencent.supersonic.headless.server.service;
|
||||
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
|
||||
import java.util.List;
|
||||
|
||||
public interface MetaEmbeddingService {
|
||||
|
||||
List<RetrieveQueryResult> retrieveQuery(List<Long> viewIds, RetrieveQuery retrieveQuery, int num);
|
||||
|
||||
}
|
||||
@@ -19,6 +19,8 @@ public interface ViewService {
|
||||
|
||||
void delete(Long id, User user);
|
||||
|
||||
List<ViewResp> getViewListByCache(MetaFilter metaFilter);
|
||||
|
||||
List<ViewResp> getViews(User user);
|
||||
|
||||
List<ViewResp> getViewsInheritAuth(User user, Long domainId);
|
||||
|
||||
@@ -7,6 +7,7 @@ import com.github.pagehelper.PageHelper;
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DataEvent;
|
||||
import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import com.tencent.supersonic.common.pojo.ModelRela;
|
||||
@@ -72,11 +73,11 @@ public class DimensionServiceImpl implements DimensionService {
|
||||
|
||||
|
||||
public DimensionServiceImpl(DimensionRepository dimensionRepository,
|
||||
ModelService modelService,
|
||||
ChatGptHelper chatGptHelper,
|
||||
DatabaseService databaseService,
|
||||
ModelRelaService modelRelaService,
|
||||
ViewService viewService) {
|
||||
ModelService modelService,
|
||||
ChatGptHelper chatGptHelper,
|
||||
DatabaseService databaseService,
|
||||
ModelRelaService modelRelaService,
|
||||
ViewService viewService) {
|
||||
this.modelService = modelService;
|
||||
this.dimensionRepository = dimensionRepository;
|
||||
this.chatGptHelper = chatGptHelper;
|
||||
@@ -129,8 +130,8 @@ public class DimensionServiceImpl implements DimensionService {
|
||||
DimensionConverter.convert(dimensionDO, dimensionReq);
|
||||
dimensionRepository.updateDimension(dimensionDO);
|
||||
if (!oldName.equals(dimensionDO.getName())) {
|
||||
sendEvent(DataItem.builder().modelId(dimensionDO.getModelId()).newName(dimensionReq.getName())
|
||||
.name(oldName).type(TypeEnums.DIMENSION)
|
||||
sendEvent(DataItem.builder().modelId(dimensionDO.getModelId() + Constants.UNDERLINE)
|
||||
.newName(dimensionReq.getName()).name(oldName).type(TypeEnums.DIMENSION)
|
||||
.id(dimensionDO.getId()).build(), EventType.UPDATE);
|
||||
}
|
||||
}
|
||||
@@ -264,7 +265,7 @@ public class DimensionServiceImpl implements DimensionService {
|
||||
}
|
||||
|
||||
private List<DimensionResp> convertList(List<DimensionDO> dimensionDOS,
|
||||
Map<Long, ModelResp> modelRespMap) {
|
||||
Map<Long, ModelResp> modelRespMap) {
|
||||
List<DimensionResp> dimensionResps = Lists.newArrayList();
|
||||
if (!CollectionUtils.isEmpty(dimensionDOS)) {
|
||||
dimensionResps = dimensionDOS.stream()
|
||||
@@ -364,9 +365,9 @@ public class DimensionServiceImpl implements DimensionService {
|
||||
}
|
||||
|
||||
private void sendEventBatch(List<DimensionDO> dimensionDOS, EventType eventType) {
|
||||
List<DataItem> dataItems = dimensionDOS.stream().map(dimensionDO ->
|
||||
DataItem.builder().id(dimensionDO.getId()).name(dimensionDO.getName())
|
||||
.modelId(dimensionDO.getModelId()).type(TypeEnums.DIMENSION).build())
|
||||
List<DataItem> dataItems = dimensionDOS.stream()
|
||||
.map(dimensionDO -> DataItem.builder().id(dimensionDO.getId()).name(dimensionDO.getName())
|
||||
.modelId(dimensionDO.getModelId() + Constants.UNDERLINE).type(TypeEnums.DIMENSION).build())
|
||||
.collect(Collectors.toList());
|
||||
eventPublisher.publishEvent(new DataEvent(this, dataItems, eventType));
|
||||
}
|
||||
@@ -376,10 +377,4 @@ public class DimensionServiceImpl implements DimensionService {
|
||||
Lists.newArrayList(dataItem), eventType));
|
||||
}
|
||||
|
||||
private DataItem getDataItem(DimensionDO dimensionDO) {
|
||||
return DataItem.builder().id(dimensionDO.getId()).name(dimensionDO.getName())
|
||||
.bizName(dimensionDO.getBizName())
|
||||
.modelId(dimensionDO.getModelId()).type(TypeEnums.DIMENSION).build();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
package com.tencent.supersonic.headless.server.service.impl;
|
||||
|
||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.util.ComponentFactory;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
|
||||
import com.tencent.supersonic.common.util.embedding.S2EmbeddingStore;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.ViewResp;
|
||||
import com.tencent.supersonic.headless.server.pojo.MetaFilter;
|
||||
import com.tencent.supersonic.headless.server.service.MetaEmbeddingService;
|
||||
import com.tencent.supersonic.headless.server.service.ViewService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class MetaEmbeddingServiceImpl implements MetaEmbeddingService {
|
||||
|
||||
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
|
||||
@Autowired
|
||||
private EmbeddingConfig embeddingConfig;
|
||||
|
||||
@Autowired
|
||||
private ViewService viewService;
|
||||
|
||||
@Override
|
||||
public List<RetrieveQueryResult> retrieveQuery(List<Long> viewIds, RetrieveQuery retrieveQuery, int num) {
|
||||
// viewIds->modelIds
|
||||
MetaFilter metaFilter = new MetaFilter();
|
||||
metaFilter.setStatus(StatusEnum.ONLINE.getCode());
|
||||
metaFilter.setIds(viewIds);
|
||||
List<ViewResp> viewListByCache = viewService.getViewListByCache(metaFilter);
|
||||
Set<Long> allModels = getModels(viewListByCache);
|
||||
|
||||
Map<Long, List<Long>> modelIdToViewIds = viewListByCache.stream()
|
||||
.flatMap(viewResp -> viewResp.getAllModels().stream()
|
||||
.map(modelId -> Pair.of(modelId, viewResp.getId())))
|
||||
.collect(Collectors.groupingBy(Pair::getLeft, Collectors.mapping(Pair::getRight, Collectors.toList())));
|
||||
|
||||
if (CollectionUtils.isNotEmpty(allModels) && allModels.size() == 1) {
|
||||
Map<String, String> filterCondition = new HashMap<>();
|
||||
filterCondition.put("modelId", allModels.stream().findFirst().get().toString());
|
||||
retrieveQuery.setFilterCondition(filterCondition);
|
||||
}
|
||||
|
||||
String collectionName = embeddingConfig.getMetaCollectionName();
|
||||
List<RetrieveQueryResult> resultList = s2EmbeddingStore.retrieveQuery(collectionName, retrieveQuery, num);
|
||||
if (CollectionUtils.isEmpty(resultList)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
//filter by modelId
|
||||
if (CollectionUtils.isEmpty(allModels)) {
|
||||
return resultList;
|
||||
}
|
||||
return resultList.stream()
|
||||
.map(retrieveQueryResult -> {
|
||||
List<Retrieval> retrievals = retrieveQueryResult.getRetrieval();
|
||||
if (CollectionUtils.isEmpty(retrievals)) {
|
||||
return retrieveQueryResult;
|
||||
}
|
||||
//filter by modelId
|
||||
retrievals.removeIf(retrieval -> {
|
||||
Long modelId = Retrieval.getLongId(retrieval.getMetadata().get("modelId"));
|
||||
if (Objects.isNull(modelId)) {
|
||||
return CollectionUtils.isEmpty(allModels);
|
||||
}
|
||||
return !allModels.contains(modelId);
|
||||
});
|
||||
//add viewId
|
||||
retrievals = retrievals.stream().flatMap(retrieval -> {
|
||||
Long modelId = Retrieval.getLongId(retrieval.getMetadata().get("modelId"));
|
||||
List<Long> viewIdsByModelId = modelIdToViewIds.get(modelId);
|
||||
if (!CollectionUtils.isEmpty(viewIdsByModelId)) {
|
||||
Set<Retrieval> result = new HashSet<>();
|
||||
for (Long viewId : viewIdsByModelId) {
|
||||
Retrieval retrievalNew = new Retrieval();
|
||||
BeanUtils.copyProperties(retrieval, retrievalNew);
|
||||
retrievalNew.getMetadata().putIfAbsent("viewId", viewId + Constants.UNDERLINE);
|
||||
result.add(retrievalNew);
|
||||
}
|
||||
return result.stream();
|
||||
}
|
||||
Set<Retrieval> result = new HashSet<>();
|
||||
result.add(retrieval);
|
||||
return result.stream();
|
||||
}).collect(Collectors.toList());
|
||||
retrieveQueryResult.setRetrieval(retrievals);
|
||||
return retrieveQueryResult;
|
||||
})
|
||||
.filter(retrieveQueryResult -> CollectionUtils.isNotEmpty(retrieveQueryResult.getRetrieval()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private Set<Long> getModels(List<ViewResp> viewListByCache) {
|
||||
return viewListByCache.stream()
|
||||
.flatMap(viewResp -> viewResp.getAllModels().stream())
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,7 @@ import com.github.pagehelper.PageHelper;
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DataEvent;
|
||||
import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import com.tencent.supersonic.common.pojo.enums.AuthType;
|
||||
@@ -259,8 +260,8 @@ public class MetricServiceImpl implements MetricService {
|
||||
metricFilter.setModelIds(Lists.newArrayList(modelId));
|
||||
List<MetricResp> metricResps = getMetrics(metricFilter);
|
||||
return metricResps.stream().filter(metricResp ->
|
||||
MetricDefineType.FIELD.equals(metricResp.getMetricDefineType())
|
||||
|| MetricDefineType.MEASURE.equals(metricResp.getMetricDefineType()))
|
||||
MetricDefineType.FIELD.equals(metricResp.getMetricDefineType())
|
||||
|| MetricDefineType.MEASURE.equals(metricResp.getMetricDefineType()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -450,8 +451,8 @@ public class MetricServiceImpl implements MetricService {
|
||||
new HashMap<>(), Lists.newArrayList());
|
||||
return DataItem.builder().id(metricDO.getId()).name(metricDO.getName())
|
||||
.bizName(metricDO.getBizName())
|
||||
.modelId(metricDO.getModelId()).type(TypeEnums.METRIC)
|
||||
.defaultAgg(metricResp.getDefaultAgg()).build();
|
||||
.modelId(metricDO.getModelId() + Constants.UNDERLINE)
|
||||
.type(TypeEnums.METRIC).defaultAgg(metricResp.getDefaultAgg()).build();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -3,6 +3,8 @@ package com.tencent.supersonic.headless.server.service.impl;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.enums.AuthType;
|
||||
@@ -19,23 +21,26 @@ import com.tencent.supersonic.headless.server.persistence.mapper.ViewDOMapper;
|
||||
import com.tencent.supersonic.headless.server.pojo.MetaFilter;
|
||||
import com.tencent.supersonic.headless.server.service.DomainService;
|
||||
import com.tencent.supersonic.headless.server.service.ViewService;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Date;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Service
|
||||
public class ViewServiceImpl
|
||||
extends ServiceImpl<ViewDOMapper, ViewDO> implements ViewService {
|
||||
|
||||
protected final Cache<MetaFilter, List<ViewResp>> viewSchemaCache =
|
||||
CacheBuilder.newBuilder().expireAfterWrite(30, TimeUnit.SECONDS).build();
|
||||
|
||||
@Autowired
|
||||
private DomainService domainService;
|
||||
|
||||
@@ -153,4 +158,14 @@ public class ViewServiceImpl
|
||||
return admins.contains(userName) || viewResp.getCreatedBy().equals(userName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ViewResp> getViewListByCache(MetaFilter metaFilter) {
|
||||
List<ViewResp> viewList = viewSchemaCache.getIfPresent(metaFilter);
|
||||
if (CollectionUtils.isEmpty(viewList)) {
|
||||
viewList = getViewList(metaFilter);
|
||||
viewSchemaCache.put(metaFilter, viewList);
|
||||
}
|
||||
return viewList;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user