mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-13 04:57:28 +00:00
[improvement]Use QueryWrapper in place of hard-coded SQLs (#1944)
Some checks are pending
supersonic CentOS CI / build (11) (push) Waiting to run
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic CentOS CI / build (8) (push) Waiting to run
supersonic mac CI / build (11) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic mac CI / build (8) (push) Waiting to run
supersonic ubuntu CI / build (11) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (8) (push) Waiting to run
supersonic windows CI / build (11) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run
supersonic windows CI / build (8) (push) Waiting to run
Some checks are pending
supersonic CentOS CI / build (11) (push) Waiting to run
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic CentOS CI / build (8) (push) Waiting to run
supersonic mac CI / build (11) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic mac CI / build (8) (push) Waiting to run
supersonic ubuntu CI / build (11) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (8) (push) Waiting to run
supersonic windows CI / build (11) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run
supersonic windows CI / build (8) (push) Waiting to run
* [improvement][launcher]Use API to get element ID avoiding hard-code. * [fix][launcher]Fix mysql scripts. * [improvement][launcher]Support DuckDB database and refactor translator code structure. * [improvement][headless-fe] Revamped the interaction for semantic modeling routing and successfully implemented the switching between dimension and dataset management. * [improvement][Headless] Add table ddl in Dbschema * [improvement][Headless] Add get database by type * [improvement][Headless] Supports automatic batch creation of models based on db table names. * [improvement][Headless] Supports getting domain by bizName * [improvement][launcher]Refactor unit tests and demo data. * [fix][launcher]Change default vector dimension to 512. * [improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService * [improvement][headless]Use QueryWrapper to replace hard-code SQL in mapper xml. * [improvement][chat]Introduce ChatMemory to delegate ChatMemoryDO. * [fix][common]Fix embedding store sys configs. * [fix][common]Fix postgres schema, using varchar instead of char. * [improvement][launcher]Change supersonic docker deployment from mysql to postgres. * [Fix][launcher]Fix a number of issues related to semantic modeling. * [Fix][headless]Fix the evaluation logic of agg type. * [fix][assembly]Fix Dockerfile and add docker compose run script. * [fix][chat]Fix "multiple assignments to same column "similar_queries". * [improvement][headless]Use LamdaQueryWrapper to avoid hard-coded column names. * [improvement][headless]Refactor headless infra to support advanced semantic modelling. * [improvement][headless]Change class name `Dim` to `Dimension`. * [improvement][chat]Introduce `TimeFieldMapper` to always map time field. * [fix][headless]Remove unnecessary dimension existence check. * [fix][chat]Fix adjusted filters don't take effect. ---------
This commit is contained in:
@@ -19,19 +19,6 @@ public class Term {
|
||||
this.nature = nature;
|
||||
}
|
||||
|
||||
public Term(String word, Nature nature, int offset) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
public Term(String word, Nature nature, int offset, int frequency) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
this.frequency = frequency;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return this.word.length();
|
||||
}
|
||||
|
||||
@@ -38,14 +38,14 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
|
||||
new Parameter("s2.embedding.store.timeout", "60", "超时时间(秒)", "", "number", MODULE_NAME);
|
||||
|
||||
public static final Parameter EMBEDDING_STORE_DIMENSION =
|
||||
new Parameter("s2.embedding.store.dimension", "", "纬度", "", "number", MODULE_NAME, null,
|
||||
getDimensionDependency());
|
||||
new Parameter("s2.embedding.store.dimension", "", "向量维度", "", "number", MODULE_NAME,
|
||||
null, getDimensionDependency());
|
||||
public static final Parameter EMBEDDING_STORE_DATABASE_NAME =
|
||||
new Parameter("s2.embedding.store.databaseName", "", "DatabaseName", "", "string",
|
||||
MODULE_NAME, null, getDatabaseNameDependency());
|
||||
|
||||
public static final Parameter EMBEDDING_STORE_POST = new Parameter("s2.embedding.store.post",
|
||||
"", "端口", "", "number", MODULE_NAME, null, getPostDependency());
|
||||
public static final Parameter EMBEDDING_STORE_POST = new Parameter("s2.embedding.store.port",
|
||||
"", "端口", "", "number", MODULE_NAME, null, getPortDependency());
|
||||
|
||||
public static final Parameter EMBEDDING_STORE_USER = new Parameter("s2.embedding.store.user",
|
||||
"", "用户名", "", "string", MODULE_NAME, null, getUserDependency());
|
||||
@@ -101,10 +101,8 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
|
||||
|
||||
private static List<Parameter.Dependency> getApiKeyDependency() {
|
||||
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
|
||||
EmbeddingStoreType.PGVECTOR.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO,
|
||||
EmbeddingStoreType.PGVECTOR.name(), DEMO));
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO));
|
||||
}
|
||||
|
||||
private static List<Parameter.Dependency> getPathDependency() {
|
||||
@@ -118,7 +116,7 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
|
||||
EmbeddingStoreType.PGVECTOR.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "384",
|
||||
EmbeddingStoreType.PGVECTOR.name(), "768"));
|
||||
EmbeddingStoreType.PGVECTOR.name(), "512"));
|
||||
}
|
||||
|
||||
private static List<Parameter.Dependency> getDatabaseNameDependency() {
|
||||
@@ -129,7 +127,7 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
|
||||
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
|
||||
}
|
||||
|
||||
private static List<Parameter.Dependency> getPostDependency() {
|
||||
private static List<Parameter.Dependency> getPortDependency() {
|
||||
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
|
||||
Lists.newArrayList(EmbeddingStoreType.PGVECTOR.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.PGVECTOR.name(), "54333"));
|
||||
@@ -140,12 +138,14 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
|
||||
EmbeddingStoreType.PGVECTOR.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus",
|
||||
EmbeddingStoreType.PGVECTOR.name(), "pgvector"));
|
||||
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
|
||||
}
|
||||
|
||||
private static List<Parameter.Dependency> getPasswordDependency() {
|
||||
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus"));
|
||||
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
|
||||
EmbeddingStoreType.PGVECTOR.name()),
|
||||
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus",
|
||||
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,6 @@ import lombok.Data;
|
||||
@Builder
|
||||
public class DataItem {
|
||||
|
||||
/** * This field uses an underscore (_) at the end. */
|
||||
private String id;
|
||||
|
||||
private String bizName;
|
||||
@@ -19,9 +18,10 @@ public class DataItem {
|
||||
|
||||
private TypeEnums type;
|
||||
|
||||
/** * This field uses an underscore (_) at the end. */
|
||||
private String modelId;
|
||||
|
||||
private String domainId;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
public String getNewName() {
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.tencent.supersonic.common.pojo;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
@@ -18,5 +19,5 @@ public class ModelRela extends RecordInfo {
|
||||
// left join, inner join, right join, outer join
|
||||
private String joinType;
|
||||
|
||||
private List<JoinCondition> joinConditions;
|
||||
private List<JoinCondition> joinConditions = Lists.newArrayList();
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
package com.tencent.supersonic.common.pojo.enums;
|
||||
|
||||
public enum TypeEnums {
|
||||
METRIC, DIMENSION, TAG_OBJECT, TAG, DOMAIN, DATASET, MODEL, UNKNOWN
|
||||
METRIC, DIMENSION, TAG, DOMAIN, DATASET, MODEL, UNKNOWN
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ public class PgvectorEmbeddingStoreFactory extends BaseEmbeddingStoreFactory {
|
||||
embeddingStore.setPort(storeConfig.getPost());
|
||||
embeddingStore.setDatabase(storeConfig.getDatabaseName());
|
||||
embeddingStore.setUser(storeConfig.getUser());
|
||||
embeddingStore.setPassword(storeConfig.getApiKey());
|
||||
embeddingStore.setPassword(storeConfig.getPassword());
|
||||
return embeddingStore;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package dev.langchain4j.store.embedding;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DataItem;
|
||||
import dev.langchain4j.data.document.Metadata;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
@@ -17,10 +18,18 @@ public class TextSegmentConvert {
|
||||
public static final String QUERY_ID = "queryId";
|
||||
|
||||
public static List<TextSegment> convertToEmbedding(List<DataItem> dataItems) {
|
||||
return dataItems.stream().map(dataItem -> {
|
||||
Map meta = JSONObject.parseObject(JSONObject.toJSONString(dataItem), Map.class);
|
||||
TextSegment textSegment = TextSegment.from(dataItem.getName(), new Metadata(meta));
|
||||
addQueryId(textSegment, dataItem.getId() + dataItem.getType().name().toLowerCase());
|
||||
return dataItems.stream().map(item -> {
|
||||
// suffix with underscore to avoid embedding issue
|
||||
DataItem newItem = DataItem.builder().domainId(item.getDomainId())
|
||||
.bizName(item.getBizName()).type(item.getType()).newName(item.getNewName())
|
||||
.defaultAgg(item.getDefaultAgg()).name(item.getName())
|
||||
.id(item.getId() + Constants.UNDERLINE)
|
||||
.modelId(item.getModelId() + Constants.UNDERLINE)
|
||||
.domainId(item.getDomainId() + Constants.UNDERLINE).build();
|
||||
|
||||
Map meta = JSONObject.parseObject(JSONObject.toJSONString(newItem), Map.class);
|
||||
TextSegment textSegment = TextSegment.from(newItem.getName(), new Metadata(meta));
|
||||
addQueryId(textSegment, newItem.getId() + newItem.getType().name().toLowerCase());
|
||||
return textSegment;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user