mirror of
https://github.com/tencentmusic/supersonic.git
synced 2026-04-28 20:04:27 +08:00
[improvement][Headless] Embedding supports Chinese by default and fixes the issue of abnormal number recognition (#726)
This commit is contained in:
@@ -51,7 +51,7 @@
|
||||
|
||||
<dependency>
|
||||
<groupId>dev.langchain4j</groupId>
|
||||
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
|
||||
<artifactId>langchain4j-embeddings-bge-small-zh</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@ import static dev.langchain4j.exception.IllegalConfigurationException.illegalCon
|
||||
import static dev.langchain4j.internal.Utils.isNullOrBlank;
|
||||
|
||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.S2OnnxEmbeddingModel;
|
||||
import dev.langchain4j.model.embedding.BgeSmallZhEmbeddingModel;
|
||||
import dev.langchain4j.model.huggingface.HuggingFaceChatModel;
|
||||
import dev.langchain4j.model.huggingface.HuggingFaceEmbeddingModel;
|
||||
import dev.langchain4j.model.huggingface.HuggingFaceLanguageModel;
|
||||
@@ -248,7 +248,7 @@ public class S2LangChain4jAutoConfiguration {
|
||||
case IN_PROCESS:
|
||||
InProcess inProcess = properties.getEmbeddingModel().getInProcess();
|
||||
if (Objects.isNull(inProcess) || isNullOrBlank(inProcess.getModelPath())) {
|
||||
return new AllMiniLmL6V2EmbeddingModel();
|
||||
return new BgeSmallZhEmbeddingModel();
|
||||
}
|
||||
return new S2OnnxEmbeddingModel(inProcess.getModelPath(), inProcess.getVocabularyPath());
|
||||
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
package com.tencent.supersonic.chat;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.NONE;
|
||||
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.SUM;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.ParseResp;
|
||||
@@ -8,21 +11,17 @@ import com.tencent.supersonic.chat.core.query.rule.metric.MetricFilterQuery;
|
||||
import com.tencent.supersonic.chat.core.query.rule.metric.MetricGroupByQuery;
|
||||
import com.tencent.supersonic.chat.core.query.rule.metric.MetricModelQuery;
|
||||
import com.tencent.supersonic.chat.core.query.rule.metric.MetricTopNQuery;
|
||||
import com.tencent.supersonic.util.DataUtils;
|
||||
import com.tencent.supersonic.common.pojo.DateConf;
|
||||
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.QueryType;
|
||||
import org.junit.Assert;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.tencent.supersonic.util.DataUtils;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.NONE;
|
||||
import static com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum.SUM;
|
||||
import org.junit.Assert;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
|
||||
public class MetricTest extends BaseTest {
|
||||
@@ -41,6 +40,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
|
||||
expectedParseInfo.getDimensionFilters().add(DataUtils.getFilter("user_name",
|
||||
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
|
||||
@@ -76,6 +76,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
|
||||
expectedParseInfo.setQueryType(QueryType.METRIC);
|
||||
|
||||
@@ -105,6 +106,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("部门"));
|
||||
|
||||
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
|
||||
@@ -126,6 +128,7 @@ public class MetricTest extends BaseTest {
|
||||
expectedParseInfo.setAggType(NONE);
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
List<String> list = new ArrayList<>();
|
||||
list.add("alice");
|
||||
list.add("lucy");
|
||||
@@ -151,6 +154,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("用户"));
|
||||
|
||||
expectedParseInfo.setDateInfo(DataUtils.getDateConf(3, DateConf.DateMode.RECENT, "DAY"));
|
||||
@@ -171,6 +175,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
expectedParseInfo.getDimensions().add(DataUtils.getSchemaElement("部门"));
|
||||
|
||||
expectedParseInfo.setDateInfo(DataUtils.getDateConf(DateConf.DateMode.RECENT, unit, period, startDay, endDay));
|
||||
@@ -197,6 +202,7 @@ public class MetricTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
expectedParseInfo.getDimensionFilters().add(DataUtils.getFilter("user_name",
|
||||
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
|
||||
|
||||
|
||||
@@ -30,6 +30,7 @@ public class MultiTurnsTest extends BaseTest {
|
||||
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("人均访问次数"));
|
||||
expectedParseInfo.getMetrics().add(DataUtils.getSchemaElement("访问用户数"));
|
||||
|
||||
expectedParseInfo.getDimensionFilters().add(DataUtils.getFilter("user_name",
|
||||
FilterOperatorEnum.EQUALS, "alice", "用户", 2L));
|
||||
|
||||
Reference in New Issue
Block a user