(improvement)(project) support for modifying filter conditions and fix group by pushdown and add windows scipt (#49)

Co-authored-by: lexluo <lexluo@tencent.com>
This commit is contained in:
lexluo09
2023-09-03 23:51:47 +08:00
committed by GitHub
parent 8440f1f30e
commit 559ef974b0
317 changed files with 7449 additions and 9413 deletions

View File

@@ -10,6 +10,7 @@ import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;

View File

@@ -1,10 +1,9 @@
package com.tencent.supersonic.knowledge;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.service.KnowledgeService;
import com.tencent.supersonic.knowledge.service.SchemaService;
import com.tencent.supersonic.knowledge.service.KnowledgeService;
import com.tencent.supersonic.knowledge.service.WordService;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.springframework.beans.factory.annotation.Autowired;
@@ -13,6 +12,8 @@ import org.springframework.context.ApplicationListener;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.util.List;
@Slf4j
@Component
public class ApplicationStartedInit implements ApplicationListener<ApplicationStartedEvent> {

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.knowledge.dictionary;
import java.util.List;
import lombok.Data;

View File

@@ -4,6 +4,7 @@ package com.tencent.supersonic.knowledge.dictionary;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import lombok.Data;
@Data

View File

@@ -5,6 +5,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;

View File

@@ -16,6 +16,7 @@ import com.hankcs.hanlp.utility.Predefine;
import com.hankcs.hanlp.utility.TextUtility;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.DataOutputStream;

View File

@@ -1,10 +1,11 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import java.util.ArrayList;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
/**

View File

@@ -1,10 +1,12 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -1,11 +1,13 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.Objects;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

View File

@@ -1,11 +1,13 @@
package com.tencent.supersonic.knowledge.dictionary.builder;
import com.google.common.collect.Lists;
import java.util.List;
import java.util.Objects;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import java.util.List;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.knowledge.persistence.dataobject;
import java.util.Date;
import lombok.Data;
@Data

View File

@@ -1,7 +1,7 @@
package com.tencent.supersonic.knowledge.persistence.mapper;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import java.util.List;
import org.apache.ibatis.annotations.Mapper;

View File

@@ -1,10 +1,11 @@
package com.tencent.supersonic.knowledge.persistence.repository;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import java.util.List;
public interface DictRepository {

View File

@@ -1,19 +1,21 @@
package com.tencent.supersonic.knowledge.persistence.repository;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.utils.DictTaskConverter;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.knowledge.dictionary.DictConfig;
import com.tencent.supersonic.knowledge.dictionary.DictTaskFilter;
import com.tencent.supersonic.knowledge.dictionary.DimValueDictInfo;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper;
import com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper;
import com.tencent.supersonic.knowledge.utils.DictTaskConverter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import org.springframework.beans.BeanUtils;
import org.springframework.stereotype.Repository;
import org.springframework.util.CollectionUtils;
@@ -26,7 +28,7 @@ public class DictRepositoryImpl implements DictRepository {
private final DictConfMapper dictConfMapper;
public DictRepositoryImpl(DictTaskMapper dictTaskMapper,
DictConfMapper dictConfMapper) {
DictConfMapper dictConfMapper) {
this.dictTaskMapper = dictTaskMapper;
this.dictConfMapper = dictConfMapper;
}

View File

@@ -7,10 +7,12 @@ import com.tencent.supersonic.chat.api.pojo.ModelSchema;
import com.tencent.supersonic.common.pojo.ResultData;
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.ParameterizedTypeReference;

View File

@@ -3,6 +3,7 @@ package com.tencent.supersonic.knowledge.semantic;
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
import com.tencent.supersonic.chat.api.pojo.SchemaValueMap;
import com.tencent.supersonic.semantic.api.model.pojo.DimValueMap;
import com.tencent.supersonic.semantic.api.model.pojo.Entity;
import com.tencent.supersonic.semantic.api.model.response.DimSchemaResp;
@@ -64,9 +65,16 @@ public class ModelSchemaBuilder {
Set<SchemaElement> dimensionValues = new HashSet<>();
for (DimSchemaResp dim : resp.getDimensions()) {
List<String> alias = new ArrayList<>();
String aliasStr = dim.getAlias();
if (Strings.isNotEmpty(aliasStr)) {
alias = Arrays.asList(aliasStr.split(aliasSplit));
}
Set<String> dimValueAlias = new HashSet<>();
if (!CollectionUtils.isEmpty(dim.getDimValueMaps())) {
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
List<DimValueMap> dimValueMaps = dim.getDimValueMaps();
List<SchemaValueMap> schemaValueMaps = new ArrayList<>();
if (!CollectionUtils.isEmpty(dimValueMaps)) {
for (DimValueMap dimValueMap : dimValueMaps) {
if (Strings.isNotEmpty(dimValueMap.getBizName())) {
dimValueAlias.add(dimValueMap.getBizName());
@@ -74,13 +82,11 @@ public class ModelSchemaBuilder {
if (!CollectionUtils.isEmpty(dimValueMap.getAlias())) {
dimValueAlias.addAll(dimValueMap.getAlias());
}
SchemaValueMap schemaValueMap = new SchemaValueMap();
BeanUtils.copyProperties(dimValueMap, schemaValueMap);
schemaValueMaps.add(schemaValueMap);
}
}
List<String> alias = new ArrayList<>();
String aliasStr = dim.getAlias();
if (Strings.isNotEmpty(aliasStr)) {
alias = Arrays.asList(aliasStr.split(aliasSplit));
}
SchemaElement dimToAdd = SchemaElement.builder()
.model(resp.getId())
@@ -90,6 +96,7 @@ public class ModelSchemaBuilder {
.type(SchemaElementType.DIMENSION)
.useCnt(dim.getUseCnt())
.alias(alias)
.schemaValueMaps(schemaValueMaps)
.build();
dimensions.add(dimToAdd);

View File

@@ -1,6 +1,7 @@
package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import java.util.List;
public interface KnowledgeService {

View File

@@ -3,8 +3,10 @@ package com.tencent.supersonic.knowledge.service;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
import java.util.List;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -7,10 +7,11 @@ import com.tencent.supersonic.chat.api.component.SemanticLayer;
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.concurrent.TimeUnit;
@Service
@Slf4j
public class SchemaService {

View File

@@ -6,9 +6,11 @@ import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.builder.WordBuilderFactory;
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
import java.util.ArrayList;
import java.util.List;
import com.tencent.supersonic.knowledge.utils.ComponentFactory;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;

View File

@@ -1,9 +1,10 @@
package com.tencent.supersonic.knowledge.utils;
import com.tencent.supersonic.chat.api.component.SemanticLayer;
import org.springframework.core.io.support.SpringFactoriesLoader;
import java.util.List;
import java.util.Objects;
import org.springframework.core.io.support.SpringFactoriesLoader;
public class ComponentFactory {

View File

@@ -9,6 +9,7 @@ import com.tencent.supersonic.knowledge.dictionary.DimValue2DictCommand;
import com.tencent.supersonic.knowledge.dictionary.DimValueInfo;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO;
import com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Date;

View File

@@ -7,6 +7,7 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
@Slf4j

View File

@@ -7,17 +7,18 @@ import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import com.tencent.supersonic.knowledge.dictionary.DictWordType;
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.dictionary.DictWord;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import com.tencent.supersonic.knowledge.dictionary.MapResult;
import com.tencent.supersonic.knowledge.dictionary.HadoopFileIOAdapter;
import com.tencent.supersonic.knowledge.service.SearchService;
import com.tencent.supersonic.knowledge.dictionary.MultiCustomDictionary;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.CollectionUtils;
import org.springframework.util.ResourceUtils;

View File

@@ -8,6 +8,7 @@ import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;

View File

@@ -5,7 +5,7 @@
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictConfMapper">
<resultMap id="DictConfPO"
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO">
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictConfDO">
<id column="id" property="id"/>
<result column="model_id" property="modelId"/>
<result column="dim_value_infos" property="dimValueInfos"/>
@@ -18,35 +18,35 @@
<insert id="createDictConf">
insert into s2_dictionary
(`domain_id`, dim_value_infos, created_at, updated_at, created_by, updated_by)
values (#{modelId}, #{dimValueInfos}, #{createdAt}, #{updatedAt}, #{createdBy},
#{updatedBy})
values
(#{modelId}, #{dimValueInfos}, #{createdAt}, #{updatedAt}, #{createdBy}, #{updatedBy})
</insert>
<insert id="upsertDictInfo">
insert into s2_dictionary
insert into s2_dictionary
(`model_id`, dim_value_infos, created_at, updated_at, created_by, updated_by)
values (#{modelId}, #{dimValueInfos}, #{createdAt}, #{updatedAt}, #{createdBy},
#{updatedBy}) on duplicate key
update
dim_value_infos = #{dimValueInfos},
updated_at = #{updatedAt},
updated_by = #{updatedBy}
values
(#{modelId}, #{dimValueInfos}, #{createdAt}, #{updatedAt}, #{createdBy}, #{updatedBy})
on duplicate key update
dim_value_infos = #{dimValueInfos},
updated_at = #{updatedAt},
updated_by = #{updatedBy}
</insert>
<update id="editDictConf">
update s2_dictionary
set dim_value_infos = #{dimValueInfos},
updated_at = #{updatedAt},
updated_by = #{updatedBy}
updated_at = #{updatedAt},
updated_by = #{updatedBy}
where model_id = #{modelId}
and status = 0
and status = 0
</update>
<select id="getDictInfoByModelId" resultMap="DictConfPO">
select *
from s2_dictionary
where model_id = #{modelId}
and status = 0
and status = 0
</select>

View File

@@ -5,7 +5,7 @@
<mapper namespace="com.tencent.supersonic.knowledge.persistence.mapper.DictTaskMapper">
<resultMap id="DimValueDictTaskPO"
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO">
type="com.tencent.supersonic.knowledge.persistence.dataobject.DictTaskDO">
<id column="id" property="id"/>
<result column="name" property="name"/>
<result column="description" property="description"/>
@@ -21,8 +21,8 @@
<insert id="createDimValueTask">
insert into s2_dictionary_task
(`name`, description, command, command_md5, status, created_by, progress, elapsed_ms)
values (#{name}, #{description}, #{command}, #{commandMd5}, #{status}, #{createdBy},
#{progress}, #{elapsedMs})
values
(#{name}, #{description}, #{command}, #{commandMd5}, #{status}, #{createdBy}, #{progress}, #{elapsedMs})
</insert>
<update id="updateTaskStatus">
@@ -54,7 +54,7 @@
and id >= #{id}
</if>
<if test="name != null and name !=''">
and `name` like "%"#{name}"%"
and `name` like "%"#{name}"%"
</if>
<if test="createdBy != null and createdBy !=''">
and created_by = #{createdBy}

View File

@@ -1,40 +1,15 @@
CREATE TABLE IF NOT EXISTS `s2_dictionary`
(
`id` bigint
(
20
) unsigned NOT NULL AUTO_INCREMENT,
`item_id` bigint
(
20
) DEFAULT NULL COMMENT '对应维度id、指标id等',
`type` varchar
(
50
) DEFAULT NULL COMMENT '对应维度、指标等',
`black_list` mediumtext COMMENT '字典黑名单',
`white_list` mediumtext COMMENT '字典白名单',
`rule_list` mediumtext COMMENT '字典规则',
`is_dict_Info` tinyint
(
1
) NOT NULL DEFAULT '0' COMMENT '1-开启写入字典0-不开启',
`created_at` datetime NOT NULL COMMENT '创建时间',
`updated_at` datetime NOT NULL COMMENT '更新时间',
`created_by` varchar
(
100
) NOT NULL COMMENT '创建人',
`updated_by` varchar
(
100
) DEFAULT NULL COMMENT '更新人',
`is_deleted` tinyint
(
1
) NOT NULL DEFAULT '0' COMMENT '1-删除,0-可用',
PRIMARY KEY
(
`id`
)
) COMMENT='字典配置信息表'
CREATE TABLE IF NOT EXISTS `s2_dictionary` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`item_id` bigint(20) DEFAULT NULL COMMENT '对应维度id、指标id等',
`type` varchar(50) DEFAULT NULL COMMENT '对应维度、指标等',
`black_list` mediumtext COMMENT '字典黑名单',
`white_list` mediumtext COMMENT '字典白名单',
`rule_list` mediumtext COMMENT '字典规则',
`is_dict_Info` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1-开启写入字典0-不开启',
`created_at` datetime NOT NULL COMMENT '创建时间',
`updated_at` datetime NOT NULL COMMENT '更新时间',
`created_by` varchar(100) NOT NULL COMMENT '创建人',
`updated_by` varchar(100) DEFAULT NULL COMMENT '更新人',
`is_deleted` tinyint(1) NOT NULL DEFAULT '0' COMMENT '1-删除,0-可用',
PRIMARY KEY (`id`)
) COMMENT='字典配置信息表'

View File

@@ -1,33 +1,11 @@
CREATE TABLE IF NOT EXISTS `s2_dictionary_task`
(
`id` bigint
(
20
) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar
(
255
) NOT NULL COMMENT '任务名称',
`description` varchar
(
255
) NOT NULL COMMENT '任务描述',
`command` mediumtext NOT NULL COMMENT '任务请求参数',
`status` int
(
10
) NOT NULL COMMENT '任务最终运行状态',
`created_at` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`created_by` varchar
(
100
) NOT NULL COMMENT '创建人',
`elapsed_ms` bigint
(
10
) DEFAULT NULL COMMENT '任务耗时',
PRIMARY KEY
(
`id`
)
) COMMENT='字典任务信息表'
CREATE TABLE IF NOT EXISTS `s2_dictionary_task` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT '任务名称',
`description` varchar(255) NOT NULL COMMENT '任务描述',
`command` mediumtext NOT NULL COMMENT '任务请求参数',
`status` int(10) NOT NULL COMMENT '任务最终运行状态',
`created_at` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`created_by` varchar(100) NOT NULL COMMENT '创建人',
`elapsed_ms` bigint(10) DEFAULT NULL COMMENT '任务耗时',
PRIMARY KEY (`id`)
)COMMENT='字典任务信息表'