mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-21 14:14:19 +08:00
(improvement)(chat) Reduce frequent loading of embedding models to improve loading performance. (#1478)
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
package dev.langchain4j.provider;
|
||||
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
public class EmbeddingModelConstant {
|
||||
|
||||
public static final String BGE_SMALL_ZH = "bge-small-zh";
|
||||
public static final String ALL_MINILM_L6_V2 = "all-minilm-l6-v2-q";
|
||||
public static final EmbeddingModel BGE_SMALL_ZH_MODEL = new BgeSmallZhEmbeddingModel();
|
||||
public static final EmbeddingModel ALL_MINI_LM_L6_V2_MODEL = new AllMiniLmL6V2QuantizedEmbeddingModel();
|
||||
|
||||
}
|
||||
@@ -3,17 +3,12 @@ package dev.langchain4j.provider;
|
||||
import com.tencent.supersonic.common.pojo.ChatModelConfig;
|
||||
import com.tencent.supersonic.common.pojo.EmbeddingModelConfig;
|
||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2QuantizedEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.S2OnnxEmbeddingModel;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.InitializingBean;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import static dev.langchain4j.inmemory.spring.InMemoryAutoConfig.ALL_MINILM_L6_V2;
|
||||
import static dev.langchain4j.inmemory.spring.InMemoryAutoConfig.BGE_SMALL_ZH;
|
||||
|
||||
@Service
|
||||
public class InMemoryModelFactory implements ModelFactory, InitializingBean {
|
||||
public static final String PROVIDER = "IN_MEMORY";
|
||||
@@ -31,13 +26,13 @@ public class InMemoryModelFactory implements ModelFactory, InitializingBean {
|
||||
return new S2OnnxEmbeddingModel(modelPath, vocabularyPath);
|
||||
}
|
||||
String modelName = embeddingModel.getModelName();
|
||||
if (BGE_SMALL_ZH.equalsIgnoreCase(modelName)) {
|
||||
return new BgeSmallZhEmbeddingModel();
|
||||
if (EmbeddingModelConstant.BGE_SMALL_ZH.equalsIgnoreCase(modelName)) {
|
||||
return EmbeddingModelConstant.BGE_SMALL_ZH_MODEL;
|
||||
}
|
||||
if (ALL_MINILM_L6_V2.equalsIgnoreCase(modelName)) {
|
||||
return new AllMiniLmL6V2QuantizedEmbeddingModel();
|
||||
if (EmbeddingModelConstant.ALL_MINILM_L6_V2.equalsIgnoreCase(modelName)) {
|
||||
return EmbeddingModelConstant.ALL_MINI_LM_L6_V2_MODEL;
|
||||
}
|
||||
return new BgeSmallZhEmbeddingModel();
|
||||
return EmbeddingModelConstant.BGE_SMALL_ZH_MODEL;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
Reference in New Issue
Block a user