diff --git a/common/src/main/java/com/tencent/supersonic/common/config/EmbeddingConfig.java b/common/src/main/java/com/tencent/supersonic/common/config/EmbeddingConfig.java index 178117f31..1a032b787 100644 --- a/common/src/main/java/com/tencent/supersonic/common/config/EmbeddingConfig.java +++ b/common/src/main/java/com/tencent/supersonic/common/config/EmbeddingConfig.java @@ -32,4 +32,6 @@ public class EmbeddingConfig { @Value("${embedding.metric.analyzeQuery.nResult:5}") private int metricAnalyzeQueryResultNum; + @Value("${embeddingStore.persistent.path:/tmp}") + private String embeddingStorePersistentPath; } diff --git a/common/src/main/java/com/tencent/supersonic/common/util/embedding/EmbeddingPersistentTask.java b/common/src/main/java/com/tencent/supersonic/common/util/embedding/EmbeddingPersistentTask.java new file mode 100644 index 000000000..502d227f2 --- /dev/null +++ b/common/src/main/java/com/tencent/supersonic/common/util/embedding/EmbeddingPersistentTask.java @@ -0,0 +1,32 @@ +package com.tencent.supersonic.common.util.embedding; + +import com.tencent.supersonic.common.util.ComponentFactory; +import javax.annotation.PreDestroy; +import lombok.extern.slf4j.Slf4j; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +@Component +@Slf4j +public class EmbeddingPersistentTask { + + private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore(); + + @PreDestroy + public void onShutdown() { + embeddingStorePersistentToFile(); + } + + private void embeddingStorePersistentToFile() { + if (s2EmbeddingStore instanceof InMemoryS2EmbeddingStore) { + log.info("start persistentToFile"); + ((InMemoryS2EmbeddingStore) s2EmbeddingStore).persistentToFile(); + log.info("end persistentToFile"); + } + } + + @Scheduled(cron = "${inMemoryEmbeddingStore.persistent.cron:0 0 * * * ?}") + public void executeTask() { + embeddingStorePersistentToFile(); + } +} \ No newline at end of file diff --git a/common/src/main/java/com/tencent/supersonic/common/util/embedding/GsonInMemoryEmbeddingStoreJsonCodec.java b/common/src/main/java/com/tencent/supersonic/common/util/embedding/GsonInMemoryEmbeddingStoreJsonCodec.java index 483cfd4e7..82eea82e1 100644 --- a/common/src/main/java/com/tencent/supersonic/common/util/embedding/GsonInMemoryEmbeddingStoreJsonCodec.java +++ b/common/src/main/java/com/tencent/supersonic/common/util/embedding/GsonInMemoryEmbeddingStoreJsonCodec.java @@ -3,14 +3,13 @@ package com.tencent.supersonic.common.util.embedding; import com.google.gson.Gson; import com.google.gson.reflect.TypeToken; import com.tencent.supersonic.common.util.embedding.InMemoryS2EmbeddingStore.InMemoryEmbeddingStore; -import dev.langchain4j.data.segment.TextSegment; import java.lang.reflect.Type; public class GsonInMemoryEmbeddingStoreJsonCodec implements InMemoryEmbeddingStoreJsonCodec { @Override - public InMemoryEmbeddingStore fromJson(String json) { - Type type = new TypeToken>() { + public InMemoryEmbeddingStore fromJson(String json) { + Type type = new TypeToken>() { }.getType(); return new Gson().fromJson(json, type); } diff --git a/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryEmbeddingStoreJsonCodec.java b/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryEmbeddingStoreJsonCodec.java index b8ff372f4..5432ac26b 100644 --- a/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryEmbeddingStoreJsonCodec.java +++ b/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryEmbeddingStoreJsonCodec.java @@ -1,10 +1,10 @@ package com.tencent.supersonic.common.util.embedding; import com.tencent.supersonic.common.util.embedding.InMemoryS2EmbeddingStore.InMemoryEmbeddingStore; -import dev.langchain4j.data.segment.TextSegment; public interface InMemoryEmbeddingStoreJsonCodec { - InMemoryEmbeddingStore fromJson(String json); + + InMemoryEmbeddingStore fromJson(String json); String toJson(InMemoryEmbeddingStore store); } diff --git a/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryS2EmbeddingStore.java b/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryS2EmbeddingStore.java index 78ec3be6f..c9b544630 100644 --- a/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryS2EmbeddingStore.java +++ b/common/src/main/java/com/tencent/supersonic/common/util/embedding/InMemoryS2EmbeddingStore.java @@ -5,9 +5,9 @@ import static java.nio.file.StandardOpenOption.CREATE; import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING; import static java.util.Comparator.comparingDouble; +import com.tencent.supersonic.common.config.EmbeddingConfig; import com.tencent.supersonic.common.util.ContextUtils; import dev.langchain4j.data.embedding.Embedding; -import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.CosineSimilarity; import dev.langchain4j.store.embedding.EmbeddingMatch; @@ -38,12 +38,44 @@ import org.apache.commons.lang3.StringUtils; @Slf4j public class InMemoryS2EmbeddingStore implements S2EmbeddingStore { + public static final String PERSISTENT_FILE_PRE = "InMemory."; private static Map> collectionNameToStore = new ConcurrentHashMap<>(); @Override public synchronized void addCollection(String collectionName) { - collectionNameToStore.computeIfAbsent(collectionName, k -> new InMemoryEmbeddingStore()); + InMemoryEmbeddingStore embeddingStore = null; + Path filePath = getPersistentPath(collectionName); + try { + if (Files.exists(filePath)) { + embeddingStore = InMemoryEmbeddingStore.fromFile(filePath); + embeddingStore.entries = new CopyOnWriteArrayList<>(embeddingStore.entries); + log.info("embeddingStore reload from file:{}", filePath); + } + } catch (Exception e) { + log.error("load persistentFile error, persistentFile:" + filePath, e); + } + if (Objects.isNull(embeddingStore)) { + embeddingStore = new InMemoryEmbeddingStore(); + } + collectionNameToStore.putIfAbsent(collectionName, embeddingStore); + } + + private Path getPersistentPath(String collectionName) { + EmbeddingConfig embeddingConfig = ContextUtils.getBean(EmbeddingConfig.class); + String persistentFile = PERSISTENT_FILE_PRE + collectionName; + return Paths.get(embeddingConfig.getEmbeddingStorePersistentPath(), persistentFile); + } + + public void persistentToFile() { + for (Entry> entry : collectionNameToStore.entrySet()) { + Path filePath = getPersistentPath(entry.getKey()); + try { + entry.getValue().serializeToFile(filePath); + } catch (Exception e) { + log.error("persistentToFile error, persistentFile:" + filePath, e); + } + } } @Override @@ -179,7 +211,7 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore { } private static final InMemoryEmbeddingStoreJsonCodec CODEC = loadCodec(); - private final List> entries = new CopyOnWriteArrayList<>(); + private List> entries = new CopyOnWriteArrayList<>(); @Override public String add(Embedding embedding) { @@ -289,11 +321,11 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore { return new GsonInMemoryEmbeddingStoreJsonCodec(); } - public static InMemoryEmbeddingStore fromJson(String json) { + public static InMemoryEmbeddingStore fromJson(String json) { return CODEC.fromJson(json); } - public static InMemoryEmbeddingStore fromFile(Path filePath) { + public static InMemoryEmbeddingStore fromFile(Path filePath) { try { String json = new String(Files.readAllBytes(filePath)); return fromJson(json); @@ -302,7 +334,7 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore { } } - public static InMemoryEmbeddingStore fromFile(String filePath) { + public static InMemoryEmbeddingStore fromFile(String filePath) { return fromFile(Paths.get(filePath)); } } diff --git a/launchers/standalone/src/main/resources/application-local.yaml b/launchers/standalone/src/main/resources/application-local.yaml index b6359c50b..ce5ce830d 100644 --- a/launchers/standalone/src/main/resources/application-local.yaml +++ b/launchers/standalone/src/main/resources/application-local.yaml @@ -79,4 +79,8 @@ s2: logging: level: dev.langchain4j: DEBUG - dev.ai4j.openai4j: DEBUG \ No newline at end of file + dev.ai4j.openai4j: DEBUG + +inMemoryEmbeddingStore: + persistent: + path: /tmp \ No newline at end of file