[improvement][Headless] Embedding supports Chinese by default and fixes the issue of abnormal number recognition (#726)

This commit is contained in:
lexluo09
2024-02-18 19:51:19 +08:00
committed by GitHub
parent 39158d6877
commit fdb69547e6
19 changed files with 62 additions and 59 deletions

View File

@@ -8,7 +8,10 @@ import lombok.Data;
@Builder
public class DataItem {
private Long id;
/***
* This field uses an underscore (_) at the end.
*/
private String id;
private String bizName;
@@ -18,6 +21,9 @@ public class DataItem {
private TypeEnums type;
/***
* This field uses an underscore (_) at the end.
*/
private String modelId;
private String defaultAgg;

View File

@@ -75,7 +75,7 @@ public class SysParameter {
parameters.add(new Parameter("embedding.mapper.number", "5",
"批量向量召回文本返回结果个数", "每个文本进行向量语义召回的文本结果个数", "number", "Mapper相关配置"));
parameters.add(new Parameter("embedding.mapper.distance.threshold",
"0.58", "向量召回相似度阈值", "相似度大于该阈值的则舍弃", "number", "Mapper相关配置"));
"0.01", "向量召回相似度阈值", "相似度大于该阈值的则舍弃", "number", "Mapper相关配置"));
//parser config
Parameter s2SQLParameter = new Parameter("s2SQL.generation", "TWO_PASS_AUTO_COT",

View File

@@ -129,7 +129,7 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
List<Retrieval> retrievals = new ArrayList<>();
for (EmbeddingMatch<EmbeddingQuery> embeddingMatch : relevant) {
Retrieval retrieval = new Retrieval();
retrieval.setDistance(embeddingMatch.score());
retrieval.setDistance(1 - embeddingMatch.score());
retrieval.setId(embeddingMatch.embeddingId());
retrieval.setQuery(embeddingMatch.embedded().getQuery());
Map<String, Object> metadata = embeddingMatch.embedded().getMetadata();