(improvement)(chat) Reduce frequent loading of embedding models to improve loading performance. (#1478)

This commit is contained in:
lexluo09
2024-07-30 11:25:03 +08:00
committed by GitHub
parent 23af977972
commit 9a1fac5d4c
6 changed files with 55 additions and 40 deletions

View File

@@ -0,0 +1,16 @@
package dev.langchain4j.provider;
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
import org.springframework.stereotype.Service;
@Service
public class EmbeddingModelConstant {
public static final String BGE_SMALL_ZH = "bge-small-zh";
public static final String ALL_MINILM_L6_V2 = "all-minilm-l6-v2-q";
public static final EmbeddingModel BGE_SMALL_ZH_MODEL = new BgeSmallZhEmbeddingModel();
public static final EmbeddingModel ALL_MINI_LM_L6_V2_MODEL = new AllMiniLmL6V2QuantizedEmbeddingModel();
}

View File

@@ -3,17 +3,12 @@ package dev.langchain4j.provider;
import com.tencent.supersonic.common.pojo.ChatModelConfig;
import com.tencent.supersonic.common.pojo.EmbeddingModelConfig;
import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.S2OnnxEmbeddingModel;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.stereotype.Service;
import static dev.langchain4j.inmemory.spring.InMemoryAutoConfig.ALL_MINILM_L6_V2;
import static dev.langchain4j.inmemory.spring.InMemoryAutoConfig.BGE_SMALL_ZH;
@Service
public class InMemoryModelFactory implements ModelFactory, InitializingBean {
public static final String PROVIDER = "IN_MEMORY";
@@ -31,13 +26,13 @@ public class InMemoryModelFactory implements ModelFactory, InitializingBean {
return new S2OnnxEmbeddingModel(modelPath, vocabularyPath);
}
String modelName = embeddingModel.getModelName();
if (BGE_SMALL_ZH.equalsIgnoreCase(modelName)) {
return new BgeSmallZhEmbeddingModel();
if (EmbeddingModelConstant.BGE_SMALL_ZH.equalsIgnoreCase(modelName)) {
return EmbeddingModelConstant.BGE_SMALL_ZH_MODEL;
}
if (ALL_MINILM_L6_V2.equalsIgnoreCase(modelName)) {
return new AllMiniLmL6V2QuantizedEmbeddingModel();
if (EmbeddingModelConstant.ALL_MINILM_L6_V2.equalsIgnoreCase(modelName)) {
return EmbeddingModelConstant.ALL_MINI_LM_L6_V2_MODEL;
}
return new BgeSmallZhEmbeddingModel();
return EmbeddingModelConstant.BGE_SMALL_ZH_MODEL;
}
@Override