[improvement][common]Add pgvector vector library adaptation. (#1800)

This commit is contained in:
Zhengyang Jia
2024-10-15 09:16:32 +08:00
committed by GitHub
parent 0b71390fde
commit 1ef642d0dd
10 changed files with 169 additions and 12 deletions

View File

@@ -174,6 +174,10 @@
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-milvus</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-pgvector</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-azure-open-ai</artifactId>

View File

@@ -19,7 +19,7 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
public static final Parameter EMBEDDING_STORE_PROVIDER = new Parameter(
"s2.embedding.store.provider", EmbeddingStoreType.IN_MEMORY.name(), "向量库类型",
"目前支持种类型IN_MEMORY、MILVUS、CHROMA", "list", MODULE_NAME, getCandidateValues());
"目前支持种类型IN_MEMORY、MILVUS、CHROMA、PGVECTOR", "list", MODULE_NAME, getCandidateValues());
public static final Parameter EMBEDDING_STORE_BASE_URL =
new Parameter("s2.embedding.store.base.url", "", "BaseUrl", "", "string", MODULE_NAME,
@@ -44,9 +44,18 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
new Parameter("s2.embedding.store.databaseName", "", "DatabaseName", "", "string",
MODULE_NAME, null, getDatabaseNameDependency());
public static final Parameter EMBEDDING_STORE_POST =
new Parameter("s2.embedding.store.post", "", "端口", "", "number", MODULE_NAME, null,
getPostDependency());
public static final Parameter EMBEDDING_STORE_USER =
new Parameter("s2.embedding.store.user", "", "用户名", "", "string", MODULE_NAME, null,
getUserDependency());
@Override
public List<Parameter> getSysParameters() {
return Lists.newArrayList(EMBEDDING_STORE_PROVIDER, EMBEDDING_STORE_BASE_URL,
EMBEDDING_STORE_POST, EMBEDDING_STORE_USER,
EMBEDDING_STORE_API_KEY, EMBEDDING_STORE_DATABASE_NAME,
EMBEDDING_STORE_PERSIST_PATH, EMBEDDING_STORE_TIMEOUT, EMBEDDING_STORE_DIMENSION);
}
@@ -62,28 +71,38 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
if (StringUtils.isNumeric(getParameterValue(EMBEDDING_STORE_DIMENSION))) {
dimension = Integer.valueOf(getParameterValue(EMBEDDING_STORE_DIMENSION));
}
Integer port = null;
if (StringUtils.isNumeric(getParameterValue(EMBEDDING_STORE_POST))) {
port = Integer.valueOf(getParameterValue(EMBEDDING_STORE_POST));
}
String user = getParameterValue(EMBEDDING_STORE_USER);
return EmbeddingStoreConfig.builder().provider(provider).baseUrl(baseUrl).apiKey(apiKey)
.persistPath(persistPath).databaseName(databaseName).timeOut(Long.valueOf(timeOut))
.dimension(dimension).build();
.dimension(dimension).post(port).user(user).build();
}
private static ArrayList<String> getCandidateValues() {
return Lists.newArrayList(EmbeddingStoreType.IN_MEMORY.name(),
EmbeddingStoreType.MILVUS.name(), EmbeddingStoreType.CHROMA.name());
EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.CHROMA.name(),
EmbeddingStoreType.PGVECTOR.name());
}
private static List<Parameter.Dependency> getBaseUrlDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.CHROMA.name()),
EmbeddingStoreType.CHROMA.name(),
EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "http://localhost:19530",
EmbeddingStoreType.CHROMA.name(), "http://localhost:8000"));
EmbeddingStoreType.CHROMA.name(), "http://localhost:8000",
EmbeddingStoreType.PGVECTOR.name(), "127.0.0.1"));
}
private static List<Parameter.Dependency> getApiKeyDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO));
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(), EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO,
EmbeddingStoreType.PGVECTOR.name(), DEMO));
}
private static List<Parameter.Dependency> getPathDependency() {
@@ -94,13 +113,29 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
private static List<Parameter.Dependency> getDimensionDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "384"));
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(), EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "384",
EmbeddingStoreType.PGVECTOR.name(), "768"));
}
private static List<Parameter.Dependency> getDatabaseNameDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), ""));
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(), EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "",
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
}
private static List<Parameter.Dependency> getPostDependency() {
return getDependency(
EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.PGVECTOR.name(), "54333"));
}
private static List<Parameter.Dependency> getUserDependency() {
return getDependency(
EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.PGVECTOR.name(), "pgvector"));
}
}

View File

@@ -22,4 +22,6 @@ public class EmbeddingStoreConfig implements Serializable {
private Long timeOut = 60L;
private Integer dimension;
private String databaseName;
private Integer post;
private String user;
}

View File

@@ -0,0 +1,23 @@
package dev.langchain4j.pgvector.spring;
import dev.langchain4j.store.embedding.pgvector.MetadataStorageConfig;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
class EmbeddingStoreProperties {
private String host;
private Integer port;
private String user;
private String password;
private String database;
private String table;
private Integer dimension;
private Boolean useIndex;
private Integer indexListSize;
private Boolean createTable;
private Boolean dropTableFirst;
private MetadataStorageConfig metadataStorageConfig;
}

View File

@@ -0,0 +1,20 @@
package dev.langchain4j.pgvector.spring;
import dev.langchain4j.store.embedding.EmbeddingStoreFactory;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import static dev.langchain4j.pgvector.spring.Properties.PREFIX;
@Configuration
@EnableConfigurationProperties(Properties.class)
public class PgvectorAutoConfig {
@Bean
@ConditionalOnProperty(PREFIX + ".embedding-store.host")
EmbeddingStoreFactory pgvectorChatModel(Properties properties) {
return new PgvectorEmbeddingStoreFactory(properties.getEmbeddingStore());
}
}

View File

@@ -0,0 +1,46 @@
package dev.langchain4j.pgvector.spring;
import com.tencent.supersonic.common.pojo.EmbeddingStoreConfig;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.BaseEmbeddingStoreFactory;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
import org.springframework.beans.BeanUtils;
public class PgvectorEmbeddingStoreFactory extends BaseEmbeddingStoreFactory {
private final EmbeddingStoreProperties storeProperties;
public PgvectorEmbeddingStoreFactory(EmbeddingStoreConfig storeConfig) {
this(createPropertiesFromConfig(storeConfig));
}
public PgvectorEmbeddingStoreFactory(EmbeddingStoreProperties storeProperties) {
this.storeProperties = storeProperties;
}
private static EmbeddingStoreProperties createPropertiesFromConfig(
EmbeddingStoreConfig storeConfig) {
EmbeddingStoreProperties embeddingStore = new EmbeddingStoreProperties();
BeanUtils.copyProperties(storeConfig, embeddingStore);
embeddingStore.setHost(storeConfig.getBaseUrl());
embeddingStore.setPort(storeConfig.getPost());
embeddingStore.setDatabase(storeConfig.getDatabaseName());
embeddingStore.setUser(storeConfig.getUser());
embeddingStore.setPassword(storeConfig.getApiKey());
return embeddingStore;
}
@Override
public EmbeddingStore<TextSegment> createEmbeddingStore(String collectionName) {
return PgVectorEmbeddingStore.builder()
.host(storeProperties.getHost())
.port(storeProperties.getPort())
.database(storeProperties.getDatabase())
.user(storeProperties.getUser())
.password(storeProperties.getPassword())
.table(collectionName)
.dimension(storeProperties.getDimension())
.build();
}
}

View File

@@ -0,0 +1,16 @@
package dev.langchain4j.pgvector.spring;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.NestedConfigurationProperty;
@Getter
@Setter
@ConfigurationProperties(prefix = Properties.PREFIX)
public class Properties {
static final String PREFIX = "langchain4j.pgvector";
@NestedConfigurationProperty EmbeddingStoreProperties embeddingStore;
}

View File

@@ -6,6 +6,7 @@ import com.tencent.supersonic.common.util.ContextUtils;
import dev.langchain4j.chroma.spring.ChromaEmbeddingStoreFactory;
import dev.langchain4j.inmemory.spring.InMemoryEmbeddingStoreFactory;
import dev.langchain4j.milvus.spring.MilvusEmbeddingStoreFactory;
import dev.langchain4j.pgvector.spring.PgvectorEmbeddingStoreFactory;
import org.apache.commons.lang3.StringUtils;
import java.util.Map;
@@ -34,6 +35,11 @@ public class EmbeddingStoreFactoryProvider {
return factoryMap.computeIfAbsent(embeddingStoreConfig,
storeConfig -> new MilvusEmbeddingStoreFactory(storeConfig));
}
if (EmbeddingStoreType.PGVECTOR.name().equalsIgnoreCase(embeddingStoreConfig.getProvider())) {
return factoryMap.computeIfAbsent(
embeddingStoreConfig,
storeConfig -> new PgvectorEmbeddingStoreFactory(storeConfig));
}
if (EmbeddingStoreType.IN_MEMORY.name()
.equalsIgnoreCase(embeddingStoreConfig.getProvider())) {
return factoryMap.computeIfAbsent(embeddingStoreConfig,

View File

@@ -1,5 +1,5 @@
package dev.langchain4j.store.embedding;
public enum EmbeddingStoreType {
IN_MEMORY, MILVUS, CHROMA
IN_MEMORY, MILVUS, CHROMA, PGVECTOR
}

View File

@@ -172,6 +172,11 @@
<artifactId>langchain4j-milvus</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-pgvector</artifactId>
<version>${langchain4j.version}</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-chatglm</artifactId>