mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-14 22:25:19 +00:00
(improvement)(chat) The embedding model will be uniformly adopted using the textSegment and will be compatible with the queryId parameter. (#1202)
This commit is contained in:
@@ -5,10 +5,18 @@ import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||
import com.tencent.supersonic.common.service.EmbeddingService;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import dev.langchain4j.store.embedding.EmbeddingQuery;
|
||||
import dev.langchain4j.data.document.Metadata;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.store.embedding.Retrieval;
|
||||
import dev.langchain4j.store.embedding.RetrieveQuery;
|
||||
import dev.langchain4j.store.embedding.RetrieveQueryResult;
|
||||
import dev.langchain4j.store.embedding.TextSegmentConvert;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
@@ -17,11 +25,6 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.core.io.ClassPathResource;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@@ -44,15 +47,13 @@ public class ExemplarManager {
|
||||
}
|
||||
|
||||
public void addExemplars(List<Exemplar> exemplars, String collectionName) {
|
||||
List<EmbeddingQuery> queries = new ArrayList<>();
|
||||
List<TextSegment> queries = new ArrayList<>();
|
||||
for (int i = 0; i < exemplars.size(); i++) {
|
||||
Exemplar exemplar = exemplars.get(i);
|
||||
String question = exemplar.getQuestion();
|
||||
Map<String, Object> metaDataMap = JsonUtil.toMap(JsonUtil.toString(exemplar), String.class, Object.class);
|
||||
EmbeddingQuery embeddingQuery = new EmbeddingQuery();
|
||||
embeddingQuery.setQueryId(String.valueOf(i));
|
||||
embeddingQuery.setQuery(question);
|
||||
embeddingQuery.setMetadata(metaDataMap);
|
||||
TextSegment embeddingQuery = TextSegment.from(question, new Metadata(metaDataMap));
|
||||
TextSegmentConvert.addQueryId(embeddingQuery, String.valueOf(i));
|
||||
queries.add(embeddingQuery);
|
||||
}
|
||||
embeddingService.addQuery(collectionName, queries);
|
||||
|
||||
@@ -5,8 +5,8 @@ import com.tencent.supersonic.common.pojo.DataEvent;
|
||||
import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import com.tencent.supersonic.common.pojo.enums.EventType;
|
||||
import com.tencent.supersonic.common.service.EmbeddingService;
|
||||
import dev.langchain4j.store.embedding.EmbeddingQuery;
|
||||
import java.util.List;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.store.embedding.TextSegmentConvert;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
@@ -15,6 +15,8 @@ import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
|
||||
@@ -35,19 +37,19 @@ public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
|
||||
if (CollectionUtils.isEmpty(dataItems)) {
|
||||
return;
|
||||
}
|
||||
List<EmbeddingQuery> embeddingQueries = EmbeddingQuery.convertToEmbedding(dataItems);
|
||||
if (CollectionUtils.isEmpty(embeddingQueries)) {
|
||||
List<TextSegment> textSegments = TextSegmentConvert.convertToEmbedding(dataItems);
|
||||
if (CollectionUtils.isEmpty(textSegments)) {
|
||||
return;
|
||||
}
|
||||
sleep();
|
||||
embeddingService.addCollection(embeddingConfig.getMetaCollectionName());
|
||||
if (event.getEventType().equals(EventType.ADD)) {
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), textSegments);
|
||||
} else if (event.getEventType().equals(EventType.DELETE)) {
|
||||
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
|
||||
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), textSegments);
|
||||
} else if (event.getEventType().equals(EventType.UPDATE)) {
|
||||
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
|
||||
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), textSegments);
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), textSegments);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -5,14 +5,15 @@ import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import com.tencent.supersonic.common.service.EmbeddingService;
|
||||
import com.tencent.supersonic.headless.server.service.DimensionService;
|
||||
import com.tencent.supersonic.headless.server.service.MetricService;
|
||||
import dev.langchain4j.store.embedding.EmbeddingQuery;
|
||||
import java.util.List;
|
||||
import javax.annotation.PreDestroy;
|
||||
import dev.langchain4j.store.embedding.TextSegmentConvert;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import javax.annotation.PreDestroy;
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
public class EmbeddingTask {
|
||||
@@ -55,11 +56,11 @@ public class EmbeddingTask {
|
||||
List<DataItem> metricDataItems = metricService.getDataEvent().getDataItems();
|
||||
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(),
|
||||
EmbeddingQuery.convertToEmbedding(metricDataItems));
|
||||
TextSegmentConvert.convertToEmbedding(metricDataItems));
|
||||
|
||||
List<DataItem> dimensionDataItems = dimensionService.getDataEvent().getDataItems();
|
||||
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(),
|
||||
EmbeddingQuery.convertToEmbedding(dimensionDataItems));
|
||||
TextSegmentConvert.convertToEmbedding(dimensionDataItems));
|
||||
} catch (Exception e) {
|
||||
log.error("reload.meta.embedding error", e);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user