mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-14 13:47:09 +00:00
[improvement][Headless] Embedding supports Chinese by default and fixes the issue of abnormal number recognition (#726)
This commit is contained in:
@@ -8,7 +8,10 @@ import lombok.Data;
|
||||
@Builder
|
||||
public class DataItem {
|
||||
|
||||
private Long id;
|
||||
/***
|
||||
* This field uses an underscore (_) at the end.
|
||||
*/
|
||||
private String id;
|
||||
|
||||
private String bizName;
|
||||
|
||||
@@ -18,6 +21,9 @@ public class DataItem {
|
||||
|
||||
private TypeEnums type;
|
||||
|
||||
/***
|
||||
* This field uses an underscore (_) at the end.
|
||||
*/
|
||||
private String modelId;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
@@ -75,7 +75,7 @@ public class SysParameter {
|
||||
parameters.add(new Parameter("embedding.mapper.number", "5",
|
||||
"批量向量召回文本返回结果个数", "每个文本进行向量语义召回的文本结果个数", "number", "Mapper相关配置"));
|
||||
parameters.add(new Parameter("embedding.mapper.distance.threshold",
|
||||
"0.58", "向量召回相似度阈值", "相似度大于该阈值的则舍弃", "number", "Mapper相关配置"));
|
||||
"0.01", "向量召回相似度阈值", "相似度大于该阈值的则舍弃", "number", "Mapper相关配置"));
|
||||
|
||||
//parser config
|
||||
Parameter s2SQLParameter = new Parameter("s2SQL.generation", "TWO_PASS_AUTO_COT",
|
||||
|
||||
@@ -129,7 +129,7 @@ public class InMemoryS2EmbeddingStore implements S2EmbeddingStore {
|
||||
List<Retrieval> retrievals = new ArrayList<>();
|
||||
for (EmbeddingMatch<EmbeddingQuery> embeddingMatch : relevant) {
|
||||
Retrieval retrieval = new Retrieval();
|
||||
retrieval.setDistance(embeddingMatch.score());
|
||||
retrieval.setDistance(1 - embeddingMatch.score());
|
||||
retrieval.setId(embeddingMatch.embeddingId());
|
||||
retrieval.setQuery(embeddingMatch.embedded().getQuery());
|
||||
Map<String, Object> metadata = embeddingMatch.embedded().getMetadata();
|
||||
|
||||
Reference in New Issue
Block a user