(improvement)(Headless) Adjust file paths and fix the issue of inserting duplicate data in embeddings. (#918)

This commit is contained in:
lexluo09
2024-04-18 10:38:18 +08:00
committed by GitHub
parent ee798b7671
commit f9c60d0c65
2 changed files with 17 additions and 14 deletions

View File

@@ -1,10 +1,5 @@
package com.tencent.supersonic.common.util.embedding;
import static dev.langchain4j.internal.Utils.randomUUID;
import static java.nio.file.StandardOpenOption.CREATE;
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
import static java.util.Comparator.comparingDouble;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.common.util.ContextUtils;
import dev.langchain4j.data.embedding.Embedding;
@@ -13,6 +8,10 @@ import dev.langchain4j.store.embedding.CosineSimilarity;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.RelevanceScore;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -20,6 +19,7 @@ import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -29,9 +29,11 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.lang3.StringUtils;
import static dev.langchain4j.internal.Utils.randomUUID;
import static java.nio.file.StandardOpenOption.CREATE;
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
import static java.util.Comparator.comparingDouble;
/***
* Implementation of S2EmbeddingStore within the Java process's in-memory.
@@ -134,7 +136,8 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
retrieval.setDistance(1 - embeddingMatch.score());
retrieval.setId(embeddingMatch.embeddingId());
retrieval.setQuery(embeddingMatch.embedded().getQuery());
Map<String, Object> metadata = embeddingMatch.embedded().getMetadata();
Map<String, Object> metadata = new HashMap<>();
metadata.putAll(embeddingMatch.embedded().getMetadata());
if (filterRetrieval(filterCondition, metadata)) {
continue;
}

View File

@@ -1,4 +1,4 @@
package com.tencent.supersonic;
package com.tencent.supersonic.headless.server.listener;
import com.tencent.supersonic.common.config.EmbeddingConfig;
import com.tencent.supersonic.headless.core.chat.parser.JavaLLMProxy;