mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 20:51:48 +00:00
(improvement)(Headless) Adjust file paths and fix the issue of inserting duplicate data in embeddings. (#918)
This commit is contained in:
@@ -1,10 +1,5 @@
|
|||||||
package com.tencent.supersonic.common.util.embedding;
|
package com.tencent.supersonic.common.util.embedding;
|
||||||
|
|
||||||
import static dev.langchain4j.internal.Utils.randomUUID;
|
|
||||||
import static java.nio.file.StandardOpenOption.CREATE;
|
|
||||||
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
|
|
||||||
import static java.util.Comparator.comparingDouble;
|
|
||||||
|
|
||||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||||
import com.tencent.supersonic.common.util.ContextUtils;
|
import com.tencent.supersonic.common.util.ContextUtils;
|
||||||
import dev.langchain4j.data.embedding.Embedding;
|
import dev.langchain4j.data.embedding.Embedding;
|
||||||
@@ -13,6 +8,10 @@ import dev.langchain4j.store.embedding.CosineSimilarity;
|
|||||||
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.RelevanceScore;
|
import dev.langchain4j.store.embedding.RelevanceScore;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.MapUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
@@ -20,6 +19,7 @@ import java.nio.file.Paths;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
@@ -29,9 +29,11 @@ import java.util.Set;
|
|||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.CopyOnWriteArraySet;
|
import java.util.concurrent.CopyOnWriteArraySet;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.apache.commons.collections4.MapUtils;
|
import static dev.langchain4j.internal.Utils.randomUUID;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import static java.nio.file.StandardOpenOption.CREATE;
|
||||||
|
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
|
||||||
|
import static java.util.Comparator.comparingDouble;
|
||||||
|
|
||||||
/***
|
/***
|
||||||
* Implementation of S2EmbeddingStore within the Java process's in-memory.
|
* Implementation of S2EmbeddingStore within the Java process's in-memory.
|
||||||
@@ -134,7 +136,8 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
|
|||||||
retrieval.setDistance(1 - embeddingMatch.score());
|
retrieval.setDistance(1 - embeddingMatch.score());
|
||||||
retrieval.setId(embeddingMatch.embeddingId());
|
retrieval.setId(embeddingMatch.embeddingId());
|
||||||
retrieval.setQuery(embeddingMatch.embedded().getQuery());
|
retrieval.setQuery(embeddingMatch.embedded().getQuery());
|
||||||
Map<String, Object> metadata = embeddingMatch.embedded().getMetadata();
|
Map<String, Object> metadata = new HashMap<>();
|
||||||
|
metadata.putAll(embeddingMatch.embedded().getMetadata());
|
||||||
if (filterRetrieval(filterCondition, metadata)) {
|
if (filterRetrieval(filterCondition, metadata)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -179,9 +182,9 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
|
|||||||
* Uses a brute force approach by iterating over all embeddings to find the best matches.
|
* Uses a brute force approach by iterating over all embeddings to find the best matches.
|
||||||
*
|
*
|
||||||
* @param <Embedded> The class of the object that has been embedded.
|
* @param <Embedded> The class of the object that has been embedded.
|
||||||
* Typically, it is {@link dev.langchain4j.data.segment.TextSegment}.
|
* Typically, it is {@link dev.langchain4j.data.segment.TextSegment}.
|
||||||
* copy from dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore
|
* copy from dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore
|
||||||
* and fix concurrentModificationException in a multi-threaded environment
|
* and fix concurrentModificationException in a multi-threaded environment
|
||||||
*/
|
*/
|
||||||
public static class InMemoryEmbeddingStore<Embedded> implements EmbeddingStore<Embedded> {
|
public static class InMemoryEmbeddingStore<Embedded> implements EmbeddingStore<Embedded> {
|
||||||
|
|
||||||
@@ -267,7 +270,7 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<EmbeddingMatch<Embedded>> findRelevant(Embedding referenceEmbedding, int maxResults,
|
public List<EmbeddingMatch<Embedded>> findRelevant(Embedding referenceEmbedding, int maxResults,
|
||||||
double minScore) {
|
double minScore) {
|
||||||
|
|
||||||
Comparator<EmbeddingMatch<Embedded>> comparator = comparingDouble(EmbeddingMatch::score);
|
Comparator<EmbeddingMatch<Embedded>> comparator = comparingDouble(EmbeddingMatch::score);
|
||||||
PriorityQueue<EmbeddingMatch<Embedded>> matches = new PriorityQueue<>(comparator);
|
PriorityQueue<EmbeddingMatch<Embedded>> matches = new PriorityQueue<>(comparator);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
package com.tencent.supersonic;
|
package com.tencent.supersonic.headless.server.listener;
|
||||||
|
|
||||||
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
import com.tencent.supersonic.common.config.EmbeddingConfig;
|
||||||
import com.tencent.supersonic.headless.core.chat.parser.JavaLLMProxy;
|
import com.tencent.supersonic.headless.core.chat.parser.JavaLLMProxy;
|
||||||
Reference in New Issue
Block a user