[improvement]Use QueryWrapper in place of hard-coded SQLs (#1944)
Some checks are pending
supersonic CentOS CI / build (11) (push) Waiting to run
supersonic CentOS CI / build (21) (push) Waiting to run
supersonic CentOS CI / build (8) (push) Waiting to run
supersonic mac CI / build (11) (push) Waiting to run
supersonic mac CI / build (21) (push) Waiting to run
supersonic mac CI / build (8) (push) Waiting to run
supersonic ubuntu CI / build (11) (push) Waiting to run
supersonic ubuntu CI / build (21) (push) Waiting to run
supersonic ubuntu CI / build (8) (push) Waiting to run
supersonic windows CI / build (11) (push) Waiting to run
supersonic windows CI / build (21) (push) Waiting to run
supersonic windows CI / build (8) (push) Waiting to run

* [improvement][launcher]Use API to get element ID avoiding hard-code.

* [fix][launcher]Fix mysql scripts.

* [improvement][launcher]Support DuckDB database and refactor translator code structure.

* [improvement][headless-fe] Revamped the interaction for semantic modeling routing and successfully implemented the switching between dimension and dataset management.

* [improvement][Headless] Add table ddl in Dbschema

* [improvement][Headless] Add get database by type

* [improvement][Headless] Supports automatic batch creation of models based on db table names.

* [improvement][Headless] Supports getting domain by bizName

* [improvement][launcher]Refactor unit tests and demo data.

* [fix][launcher]Change default vector dimension to 512.

* [improvement](Dict) add dimValueAliasMap info for KnowledgeBaseService

* [improvement][headless]Use QueryWrapper to replace hard-code SQL in mapper xml.

* [improvement][chat]Introduce ChatMemory to delegate ChatMemoryDO.

* [fix][common]Fix embedding store sys configs.

* [fix][common]Fix postgres schema, using varchar instead of char.

* [improvement][launcher]Change supersonic docker deployment from mysql to postgres.

* [Fix][launcher]Fix a number of issues related to semantic modeling.

* [Fix][headless]Fix the evaluation logic of agg type.

* [fix][assembly]Fix Dockerfile and add docker compose run script.

* [fix][chat]Fix "multiple assignments to same column "similar_queries".

* [improvement][headless]Use LamdaQueryWrapper to avoid hard-coded column names.

* [improvement][headless]Refactor headless infra to support advanced semantic modelling.

* [improvement][headless]Change class name `Dim` to `Dimension`.

* [improvement][chat]Introduce `TimeFieldMapper` to always map time field.

* [fix][headless]Remove unnecessary dimension existence check.

* [fix][chat]Fix adjusted filters don't take effect.

---------
This commit is contained in:
Jun Zhang
2024-12-08 13:32:29 +08:00
committed by GitHub
parent 0fc29304a8
commit e55f43c737
120 changed files with 844 additions and 5810 deletions

View File

@@ -19,19 +19,6 @@ public class Term {
this.nature = nature;
}
public Term(String word, Nature nature, int offset) {
this.word = word;
this.nature = nature;
this.offset = offset;
}
public Term(String word, Nature nature, int offset, int frequency) {
this.word = word;
this.nature = nature;
this.offset = offset;
this.frequency = frequency;
}
public int length() {
return this.word.length();
}

View File

@@ -38,14 +38,14 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
new Parameter("s2.embedding.store.timeout", "60", "超时时间(秒)", "", "number", MODULE_NAME);
public static final Parameter EMBEDDING_STORE_DIMENSION =
new Parameter("s2.embedding.store.dimension", "", "", "", "number", MODULE_NAME, null,
getDimensionDependency());
new Parameter("s2.embedding.store.dimension", "", "向量维", "", "number", MODULE_NAME,
null, getDimensionDependency());
public static final Parameter EMBEDDING_STORE_DATABASE_NAME =
new Parameter("s2.embedding.store.databaseName", "", "DatabaseName", "", "string",
MODULE_NAME, null, getDatabaseNameDependency());
public static final Parameter EMBEDDING_STORE_POST = new Parameter("s2.embedding.store.post",
"", "端口", "", "number", MODULE_NAME, null, getPostDependency());
public static final Parameter EMBEDDING_STORE_POST = new Parameter("s2.embedding.store.port",
"", "端口", "", "number", MODULE_NAME, null, getPortDependency());
public static final Parameter EMBEDDING_STORE_USER = new Parameter("s2.embedding.store.user",
"", "用户名", "", "string", MODULE_NAME, null, getUserDependency());
@@ -101,10 +101,8 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
private static List<Parameter.Dependency> getApiKeyDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO,
EmbeddingStoreType.PGVECTOR.name(), DEMO));
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), DEMO));
}
private static List<Parameter.Dependency> getPathDependency() {
@@ -118,7 +116,7 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "384",
EmbeddingStoreType.PGVECTOR.name(), "768"));
EmbeddingStoreType.PGVECTOR.name(), "512"));
}
private static List<Parameter.Dependency> getDatabaseNameDependency() {
@@ -129,7 +127,7 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
}
private static List<Parameter.Dependency> getPostDependency() {
private static List<Parameter.Dependency> getPortDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.PGVECTOR.name(), "54333"));
@@ -140,12 +138,14 @@ public class EmbeddingStoreParameterConfig extends ParameterConfig {
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus",
EmbeddingStoreType.PGVECTOR.name(), "pgvector"));
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
}
private static List<Parameter.Dependency> getPasswordDependency() {
return getDependency(EMBEDDING_STORE_PROVIDER.getName(),
Lists.newArrayList(EmbeddingStoreType.MILVUS.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus"));
Lists.newArrayList(EmbeddingStoreType.MILVUS.name(),
EmbeddingStoreType.PGVECTOR.name()),
ImmutableMap.of(EmbeddingStoreType.MILVUS.name(), "milvus",
EmbeddingStoreType.PGVECTOR.name(), "postgres"));
}
}

View File

@@ -8,7 +8,6 @@ import lombok.Data;
@Builder
public class DataItem {
/** * This field uses an underscore (_) at the end. */
private String id;
private String bizName;
@@ -19,9 +18,10 @@ public class DataItem {
private TypeEnums type;
/** * This field uses an underscore (_) at the end. */
private String modelId;
private String domainId;
private String defaultAgg;
public String getNewName() {

View File

@@ -1,5 +1,6 @@
package com.tencent.supersonic.common.pojo;
import com.google.common.collect.Lists;
import lombok.Data;
import java.util.List;
@@ -18,5 +19,5 @@ public class ModelRela extends RecordInfo {
// left join, inner join, right join, outer join
private String joinType;
private List<JoinCondition> joinConditions;
private List<JoinCondition> joinConditions = Lists.newArrayList();
}

View File

@@ -1,5 +1,5 @@
package com.tencent.supersonic.common.pojo.enums;
public enum TypeEnums {
METRIC, DIMENSION, TAG_OBJECT, TAG, DOMAIN, DATASET, MODEL, UNKNOWN
METRIC, DIMENSION, TAG, DOMAIN, DATASET, MODEL, UNKNOWN
}

View File

@@ -26,7 +26,7 @@ public class PgvectorEmbeddingStoreFactory extends BaseEmbeddingStoreFactory {
embeddingStore.setPort(storeConfig.getPost());
embeddingStore.setDatabase(storeConfig.getDatabaseName());
embeddingStore.setUser(storeConfig.getUser());
embeddingStore.setPassword(storeConfig.getApiKey());
embeddingStore.setPassword(storeConfig.getPassword());
return embeddingStore;
}

View File

@@ -1,6 +1,7 @@
package dev.langchain4j.store.embedding;
import com.alibaba.fastjson.JSONObject;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.DataItem;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.segment.TextSegment;
@@ -17,10 +18,18 @@ public class TextSegmentConvert {
public static final String QUERY_ID = "queryId";
public static List<TextSegment> convertToEmbedding(List<DataItem> dataItems) {
return dataItems.stream().map(dataItem -> {
Map meta = JSONObject.parseObject(JSONObject.toJSONString(dataItem), Map.class);
TextSegment textSegment = TextSegment.from(dataItem.getName(), new Metadata(meta));
addQueryId(textSegment, dataItem.getId() + dataItem.getType().name().toLowerCase());
return dataItems.stream().map(item -> {
// suffix with underscore to avoid embedding issue
DataItem newItem = DataItem.builder().domainId(item.getDomainId())
.bizName(item.getBizName()).type(item.getType()).newName(item.getNewName())
.defaultAgg(item.getDefaultAgg()).name(item.getName())
.id(item.getId() + Constants.UNDERLINE)
.modelId(item.getModelId() + Constants.UNDERLINE)
.domainId(item.getDomainId() + Constants.UNDERLINE).build();
Map meta = JSONObject.parseObject(JSONObject.toJSONString(newItem), Map.class);
TextSegment textSegment = TextSegment.from(newItem.getName(), new Metadata(meta));
addQueryId(textSegment, newItem.getId() + newItem.getType().name().toLowerCase());
return textSegment;
}).collect(Collectors.toList());
}