(improvement)(chat) Upgrade and optimize the embedding metastore. (#1198)

This commit is contained in:
lexluo09
2024-06-23 21:46:10 +08:00
committed by GitHub
parent 2ae94fb38c
commit 4d6cbf31f7
46 changed files with 3788 additions and 498 deletions

View File

@@ -2,12 +2,11 @@ package com.tencent.supersonic.headless.chat.knowledge;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.common.pojo.Constants;
import dev.langchain4j.store.embedding.ComponentFactory;
import com.tencent.supersonic.common.service.EmbeddingService;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import dev.langchain4j.store.embedding.Retrieval;
import dev.langchain4j.store.embedding.RetrieveQuery;
import dev.langchain4j.store.embedding.RetrieveQueryResult;
import dev.langchain4j.store.embedding.S2EmbeddingStore;
import com.tencent.supersonic.headless.chat.knowledge.helper.NatureHelper;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
@@ -26,7 +25,8 @@ import org.springframework.stereotype.Service;
@Slf4j
public class MetaEmbeddingService {
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
@Autowired
private EmbeddingService embeddingService;
@Autowired
private EmbeddingConfig embeddingConfig;
@@ -42,7 +42,7 @@ public class MetaEmbeddingService {
}
String collectionName = embeddingConfig.getMetaCollectionName();
List<RetrieveQueryResult> resultList = s2EmbeddingStore.retrieveQuery(collectionName, retrieveQuery, num);
List<RetrieveQueryResult> resultList = embeddingService.retrieveQuery(collectionName, retrieveQuery, num);
if (CollectionUtils.isEmpty(resultList)) {
return new ArrayList<>();
}

View File

@@ -3,19 +3,12 @@ package com.tencent.supersonic.headless.chat.parser.llm;
import com.fasterxml.jackson.core.type.TypeReference;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import dev.langchain4j.store.embedding.ComponentFactory;
import com.tencent.supersonic.common.service.EmbeddingService;
import com.tencent.supersonic.common.util.JsonUtil;
import dev.langchain4j.store.embedding.EmbeddingQuery;
import dev.langchain4j.store.embedding.Retrieval;
import dev.langchain4j.store.embedding.RetrieveQuery;
import dev.langchain4j.store.embedding.RetrieveQueryResult;
import dev.langchain4j.store.embedding.S2EmbeddingStore;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@@ -24,6 +17,11 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.ClassPathResource;
import org.springframework.stereotype.Component;
@Slf4j
@Component
@@ -31,7 +29,8 @@ public class ExemplarManager {
private static final String EXAMPLE_JSON_FILE = "s2ql_exemplar.json";
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
@Autowired
private EmbeddingService embeddingService;
private TypeReference<List<Exemplar>> valueTypeRef = new TypeReference<List<Exemplar>>() {
};
@@ -56,7 +55,7 @@ public class ExemplarManager {
embeddingQuery.setMetadata(metaDataMap);
queries.add(embeddingQuery);
}
s2EmbeddingStore.addQuery(collectionName, queries);
embeddingService.addQuery(collectionName, queries);
}
public List<Map<String, String>> recallExemplars(String queryText, int maxResults) {
@@ -64,7 +63,7 @@ public class ExemplarManager {
RetrieveQuery retrieveQuery = RetrieveQuery.builder().queryTextsList(Collections.singletonList(queryText))
.queryEmbeddings(null).build();
List<RetrieveQueryResult> resultList = s2EmbeddingStore.retrieveQuery(collectionName, retrieveQuery,
List<RetrieveQueryResult> resultList = embeddingService.retrieveQuery(collectionName, retrieveQuery,
maxResults);
List<Map<String, String>> result = new ArrayList<>();
if (CollectionUtils.isEmpty(resultList)) {

View File

@@ -4,9 +4,9 @@ import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.common.pojo.DataEvent;
import com.tencent.supersonic.common.pojo.DataItem;
import com.tencent.supersonic.common.pojo.enums.EventType;
import dev.langchain4j.store.embedding.ComponentFactory;
import com.tencent.supersonic.common.service.EmbeddingService;
import dev.langchain4j.store.embedding.EmbeddingQuery;
import dev.langchain4j.store.embedding.S2EmbeddingStore;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
@@ -15,8 +15,6 @@ import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
import java.util.List;
@Component
@Slf4j
public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
@@ -24,7 +22,8 @@ public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
@Autowired
private EmbeddingConfig embeddingConfig;
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
@Autowired
private EmbeddingService embeddingService;
@Value("${embedding.operation.sleep.time:3000}")
private Integer embeddingOperationSleepTime;
@@ -41,14 +40,14 @@ public class MetaEmbeddingListener implements ApplicationListener<DataEvent> {
return;
}
sleep();
s2EmbeddingStore.addCollection(embeddingConfig.getMetaCollectionName());
embeddingService.addCollection(embeddingConfig.getMetaCollectionName());
if (event.getEventType().equals(EventType.ADD)) {
s2EmbeddingStore.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
} else if (event.getEventType().equals(EventType.DELETE)) {
s2EmbeddingStore.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
} else if (event.getEventType().equals(EventType.UPDATE)) {
s2EmbeddingStore.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
s2EmbeddingStore.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
embeddingService.deleteQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(), embeddingQueries);
}
}

View File

@@ -2,25 +2,23 @@ package com.tencent.supersonic.headless.server.schedule;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.common.pojo.DataItem;
import dev.langchain4j.store.embedding.ComponentFactory;
import dev.langchain4j.store.embedding.EmbeddingQuery;
import dev.langchain4j.store.embedding.InMemoryS2EmbeddingStore;
import dev.langchain4j.store.embedding.S2EmbeddingStore;
import com.tencent.supersonic.common.service.EmbeddingService;
import com.tencent.supersonic.headless.server.service.DimensionService;
import com.tencent.supersonic.headless.server.service.MetricService;
import dev.langchain4j.store.embedding.EmbeddingQuery;
import java.util.List;
import javax.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.PreDestroy;
import java.util.List;
@Component
@Slf4j
public class EmbeddingTask {
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
@Autowired
private EmbeddingService embeddingService;
@Autowired
private EmbeddingConfig embeddingConfig;
@Autowired
@@ -31,23 +29,22 @@ public class EmbeddingTask {
@PreDestroy
public void onShutdown() {
embeddingStorePersistentToFile();
// embeddingStorePersistentToFile();
}
private void embeddingStorePersistentToFile() {
if (s2EmbeddingStore instanceof InMemoryS2EmbeddingStore) {
log.info("start persistentToFile");
((InMemoryS2EmbeddingStore) s2EmbeddingStore).persistentToFile();
log.info("end persistentToFile");
}
}
// private void embeddingStorePersistentToFile() {
// if (embeddingService instanceof InMemoryEmbeddingService) {
// log.info("start persistentToFile");
// ((InMemoryEmbeddingService) embeddingService).persistentToFile();
// log.info("end persistentToFile");
// }
// }
@Scheduled(cron = "${inMemoryEmbeddingStore.persistent.cron:0 0 * * * ?}")
public void executeTask() {
embeddingStorePersistentToFile();
// embeddingStorePersistentToFile();
}
/***
* reload meta embedding
*/
@@ -57,11 +54,11 @@ public class EmbeddingTask {
try {
List<DataItem> metricDataItems = metricService.getDataEvent().getDataItems();
s2EmbeddingStore.addQuery(embeddingConfig.getMetaCollectionName(),
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(),
EmbeddingQuery.convertToEmbedding(metricDataItems));
List<DataItem> dimensionDataItems = dimensionService.getDataEvent().getDataItems();
s2EmbeddingStore.addQuery(embeddingConfig.getMetaCollectionName(),
embeddingService.addQuery(embeddingConfig.getMetaCollectionName(),
EmbeddingQuery.convertToEmbedding(dimensionDataItems));
} catch (Exception e) {
log.error("reload.meta.embedding error", e);