mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-11 03:58:14 +00:00
headless integrates knowledge (#722)
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 导入字典的可选配置
|
||||
*/
|
||||
|
||||
@Data
|
||||
public class ItemValueConfig {
|
||||
|
||||
private Long metricId;
|
||||
private List<String> blackList = new ArrayList<>();
|
||||
private List<String> whiteList = new ArrayList<>();
|
||||
private List<String> ruleList = new ArrayList<>();
|
||||
private Long limit;
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RelatedSchemaElement {
|
||||
|
||||
private Long dimensionId;
|
||||
|
||||
private boolean isNecessary;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.google.common.collect.Lists;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@Getter
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class SchemaElement implements Serializable {
|
||||
|
||||
private Long view;
|
||||
private Long id;
|
||||
private String name;
|
||||
private String bizName;
|
||||
private Long useCnt;
|
||||
private SchemaElementType type;
|
||||
private List<String> alias;
|
||||
private List<SchemaValueMap> schemaValueMaps;
|
||||
private List<RelatedSchemaElement> relatedSchemaElements;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
private double order;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
SchemaElement schemaElement = (SchemaElement) o;
|
||||
return Objects.equal(view, schemaElement.view) && Objects.equal(id,
|
||||
schemaElement.id) && Objects.equal(name, schemaElement.name)
|
||||
&& Objects.equal(bizName, schemaElement.bizName)
|
||||
&& Objects.equal(type, schemaElement.type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(view, id, name, bizName, type);
|
||||
}
|
||||
|
||||
public List<String> getModelNames() {
|
||||
return Lists.newArrayList(name);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
public enum SchemaElementType {
|
||||
VIEW,
|
||||
METRIC,
|
||||
DIMENSION,
|
||||
VALUE,
|
||||
ENTITY,
|
||||
TAG,
|
||||
ID,
|
||||
DATE
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
package com.tencent.supersonic.headless.api.pojo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class SchemaValueMap {
|
||||
|
||||
/**
|
||||
* dimension value in db
|
||||
*/
|
||||
private String techName;
|
||||
|
||||
/**
|
||||
* dimension value for result show
|
||||
*/
|
||||
private String bizName;
|
||||
|
||||
/**
|
||||
* dimension value for user query
|
||||
*/
|
||||
private List<String> alias = new ArrayList<>();
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.request;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class DictItemFilter {
|
||||
private Long id;
|
||||
private TypeEnums type;
|
||||
private Long itemId;
|
||||
@NotNull
|
||||
private StatusEnum status;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.request;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import com.tencent.supersonic.headless.api.pojo.ItemValueConfig;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
@Data
|
||||
public class DictItemReq {
|
||||
|
||||
private Long id;
|
||||
@NotNull
|
||||
private TypeEnums type;
|
||||
@NotNull
|
||||
private Long itemId;
|
||||
private ItemValueConfig config;
|
||||
|
||||
/**
|
||||
* ONLINE - 正常更新
|
||||
* OFFLINE - 停止更新,但字典文件不删除
|
||||
* DELETED - 停止更新,且删除字典文件
|
||||
*/
|
||||
@NotNull
|
||||
private StatusEnum status;
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.request;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class DictSingleTaskReq {
|
||||
@NotNull
|
||||
private TypeEnums type;
|
||||
@NotNull
|
||||
private Long itemId;
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.request;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DimensionValueReq {
|
||||
|
||||
private Integer agentId;
|
||||
|
||||
@NotNull
|
||||
private Long elementID;
|
||||
|
||||
private Long modelId;
|
||||
|
||||
private String bizName;
|
||||
|
||||
@NotNull
|
||||
private String value;
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.response;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import com.tencent.supersonic.headless.api.pojo.ItemValueConfig;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.Constants.UNDERLINE;
|
||||
|
||||
@Data
|
||||
public class DictItemResp {
|
||||
private Long id;
|
||||
|
||||
private Long modelId;
|
||||
|
||||
private String bizName;
|
||||
|
||||
@NotNull
|
||||
private TypeEnums type;
|
||||
@NotNull
|
||||
private Long itemId;
|
||||
private ItemValueConfig config;
|
||||
|
||||
/**
|
||||
* ONLINE - 正常更新
|
||||
* OFFLINE - 停止更新,但字典文件不删除
|
||||
* DELETED - 停止更新,且删除字典文件
|
||||
*/
|
||||
@NotNull
|
||||
private StatusEnum status;
|
||||
|
||||
public String getNature() {
|
||||
return UNDERLINE + modelId + UNDERLINE + itemId;
|
||||
}
|
||||
|
||||
public String fetchDictFileName() {
|
||||
return String.format("dic_value_%d_%s_%s", modelId, type.name(), itemId);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.response;
|
||||
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class DictTaskResp extends DictItemResp {
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
private String taskStatus;
|
||||
private Date createdAt;
|
||||
private String createdBy;
|
||||
private Long elapsedMs;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.tencent.supersonic.headless.api.pojo.response;
|
||||
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class S2Term {
|
||||
|
||||
public String word;
|
||||
|
||||
public Nature nature;
|
||||
public int offset;
|
||||
public int frequency = 0;
|
||||
|
||||
public S2Term() {
|
||||
}
|
||||
|
||||
public S2Term(String word, Nature nature) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
}
|
||||
|
||||
public S2Term(String word, Nature nature, int offset) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
public S2Term(String word, Nature nature, int offset, int frequency) {
|
||||
this.word = word;
|
||||
this.nature = nature;
|
||||
this.offset = offset;
|
||||
this.frequency = frequency;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return this.word.length();
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
if (obj instanceof Term) {
|
||||
Term term = (Term) obj;
|
||||
if (this.nature == term.nature && this.word.equals(term.word)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return super.equals(obj);
|
||||
}
|
||||
}
|
||||
@@ -75,6 +75,44 @@
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<version>${hadoop.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.curator</groupId>
|
||||
<artifactId>*</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- <dependency>-->
|
||||
<!-- <groupId>com.hankcs</groupId>-->
|
||||
<!-- <artifactId>hanlp</artifactId>-->
|
||||
<!-- <version>${hanlp.version}</version>-->
|
||||
<!-- </dependency>-->
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
<version>${commons.compress.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
package com.tencent.supersonic.headless.core.file;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface FileHandler {
|
||||
|
||||
/**
|
||||
* backup files to a specific directory
|
||||
* config: dict.directory.backup
|
||||
*
|
||||
* @param fileName
|
||||
*/
|
||||
void backupFile(String fileName);
|
||||
|
||||
/**
|
||||
* create a directory
|
||||
*
|
||||
* @param path
|
||||
*/
|
||||
void createDir(String path);
|
||||
|
||||
Boolean existPath(String path);
|
||||
|
||||
/**
|
||||
* write data to a specific file,
|
||||
* config dir: dict.directory.latest
|
||||
*
|
||||
* @param data
|
||||
* @param fileName
|
||||
* @param append
|
||||
*/
|
||||
void writeFile(List<String> data, String fileName, Boolean append);
|
||||
|
||||
/**
|
||||
* get the knowledge file root directory
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String getDictRootPath();
|
||||
|
||||
/**
|
||||
* delete dictionary file
|
||||
* automatic backup
|
||||
*
|
||||
* @param fileName
|
||||
* @return
|
||||
*/
|
||||
Boolean deleteDictFile(String fileName);
|
||||
|
||||
/**
|
||||
* delete files directly without backup
|
||||
*
|
||||
* @param fileName
|
||||
*/
|
||||
void deleteFile(String fileName);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
package com.tencent.supersonic.headless.core.file;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class FileHandlerImpl implements FileHandler {
|
||||
|
||||
private final LocalFileConfig localFileConfig;
|
||||
|
||||
public FileHandlerImpl(LocalFileConfig localFileConfig) {
|
||||
this.localFileConfig = localFileConfig;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void backupFile(String fileName) {
|
||||
String dictDirectoryBackup = localFileConfig.getDictDirectoryBackup();
|
||||
if (!existPath(dictDirectoryBackup)) {
|
||||
createDir(dictDirectoryBackup);
|
||||
}
|
||||
|
||||
String source = localFileConfig.getDictDirectoryLatest() + "/" + fileName;
|
||||
String target = dictDirectoryBackup + "/" + fileName;
|
||||
Path sourcePath = Paths.get(source);
|
||||
Path targetPath = Paths.get(target);
|
||||
try {
|
||||
Files.copy(sourcePath, targetPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
log.info("backupFile successfully! path:{}", targetPath.toAbsolutePath());
|
||||
} catch (IOException e) {
|
||||
log.info("Failed to copy file: " + e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createDir(String directoryPath) {
|
||||
Path path = Paths.get(directoryPath);
|
||||
try {
|
||||
Files.createDirectories(path);
|
||||
log.info("Directory created successfully!");
|
||||
} catch (IOException e) {
|
||||
log.info("Failed to create directory: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteFile(String filePath) {
|
||||
Path path = Paths.get(filePath);
|
||||
try {
|
||||
Files.delete(path);
|
||||
log.info("File:{} deleted successfully!", getAbsolutePath(filePath));
|
||||
} catch (IOException e) {
|
||||
log.warn("Failed to delete file:{}, e:", getAbsolutePath(filePath), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean existPath(String pathStr) {
|
||||
Path path = Paths.get(pathStr);
|
||||
if (Files.exists(path)) {
|
||||
log.info("path:{} exists!", getAbsolutePath(pathStr));
|
||||
return true;
|
||||
} else {
|
||||
log.info("path:{} not exists!", getAbsolutePath(pathStr));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeFile(List<String> lines, String fileName, Boolean append) {
|
||||
if (CollectionUtils.isEmpty(lines)) {
|
||||
log.info("lines is empty");
|
||||
return;
|
||||
}
|
||||
String dictDirectoryLatest = localFileConfig.getDictDirectoryLatest();
|
||||
if (!existPath(dictDirectoryLatest)) {
|
||||
createDir(dictDirectoryLatest);
|
||||
}
|
||||
String filePath = dictDirectoryLatest + "/" + fileName;
|
||||
if (existPath(filePath)) {
|
||||
backupFile(fileName);
|
||||
}
|
||||
try (BufferedWriter writer = getWriter(filePath, append)) {
|
||||
if (!CollectionUtils.isEmpty(lines)) {
|
||||
for (String line : lines) {
|
||||
writer.write(line);
|
||||
writer.newLine();
|
||||
}
|
||||
}
|
||||
log.info("File:{} written successfully!", getAbsolutePath(filePath));
|
||||
} catch (IOException e) {
|
||||
log.info("Failed to write file:{}, e:", getAbsolutePath(filePath), e);
|
||||
}
|
||||
}
|
||||
|
||||
public String getAbsolutePath(String path) {
|
||||
return Paths.get(path).toAbsolutePath().toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDictRootPath() {
|
||||
return Paths.get(localFileConfig.getDictDirectoryLatest()).toAbsolutePath().toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean deleteDictFile(String fileName) {
|
||||
backupFile(fileName);
|
||||
deleteFile(localFileConfig.getDictDirectoryLatest() + "/" + fileName);
|
||||
return true;
|
||||
}
|
||||
|
||||
private BufferedWriter getWriter(String filePath, Boolean append) throws IOException {
|
||||
if (append) {
|
||||
return Files.newBufferedWriter(Paths.get(filePath), StandardCharsets.UTF_8, StandardOpenOption.APPEND);
|
||||
}
|
||||
return Files.newBufferedWriter(Paths.get(filePath), StandardCharsets.UTF_8);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package com.tencent.supersonic.headless.core.file;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Data
|
||||
@Configuration
|
||||
@Slf4j
|
||||
public class LocalFileConfig {
|
||||
|
||||
|
||||
@Value("${dict.directory.latest:/data/dictionary/custom}")
|
||||
private String dictDirectoryLatest;
|
||||
|
||||
@Value("${dict.directory.backup:./data/dictionary/backup}")
|
||||
private String dictDirectoryBackup;
|
||||
|
||||
public String getDictDirectoryLatest() {
|
||||
return getResourceDir() + dictDirectoryLatest;
|
||||
}
|
||||
|
||||
public String getDictDirectoryBackup() {
|
||||
return getResourceDir() + dictDirectoryBackup;
|
||||
}
|
||||
|
||||
private String getResourceDir() {
|
||||
//return hanlpPropertiesPath = HanlpHelper.getHanlpPropertiesPath();
|
||||
return ClassLoader.getSystemClassLoader().getResource("").getPath();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class DatabaseMapResult extends MapResult {
|
||||
|
||||
private SchemaElement schemaElement;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
DatabaseMapResult that = (DatabaseMapResult) o;
|
||||
return Objects.equal(name, that.name) && Objects.equal(schemaElement, that.schemaElement);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(name, schemaElement);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
public enum DictUpdateMode {
|
||||
|
||||
OFFLINE_FULL("OFFLINE_FULL"),
|
||||
OFFLINE_MODEL("OFFLINE_MODEL"),
|
||||
REALTIME_ADD("REALTIME_ADD"),
|
||||
REALTIME_DELETE("REALTIME_DELETE"),
|
||||
NOT_SUPPORT("NOT_SUPPORT");
|
||||
|
||||
private String value;
|
||||
|
||||
DictUpdateMode(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static DictUpdateMode of(String value) {
|
||||
for (DictUpdateMode item : DictUpdateMode.values()) {
|
||||
if (item.value.equalsIgnoreCase(value)) {
|
||||
return item;
|
||||
}
|
||||
}
|
||||
return DictUpdateMode.NOT_SUPPORT;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import java.util.Objects;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
/***
|
||||
* word nature
|
||||
*/
|
||||
@Data
|
||||
@ToString
|
||||
public class DictWord {
|
||||
|
||||
private String word;
|
||||
private String nature;
|
||||
private String natureWithFrequency;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
DictWord that = (DictWord) o;
|
||||
return Objects.equals(word, that.word) && Objects.equals(natureWithFrequency, that.natureWithFrequency);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(word, natureWithFrequency);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* Dictionary Attribute Util
|
||||
*/
|
||||
public class DictionaryAttributeUtil {
|
||||
|
||||
public static CoreDictionary.Attribute getAttribute(CoreDictionary.Attribute old, CoreDictionary.Attribute add) {
|
||||
Map<Nature, Integer> map = new HashMap<>();
|
||||
IntStream.range(0, old.nature.length).boxed().forEach(i -> map.put(old.nature[i], old.frequency[i]));
|
||||
IntStream.range(0, add.nature.length).boxed().forEach(i -> map.put(add.nature[i], add.frequency[i]));
|
||||
List<Map.Entry<Nature, Integer>> list = new LinkedList<Map.Entry<Nature, Integer>>(map.entrySet());
|
||||
Collections.sort(list, new Comparator<Map.Entry<Nature, Integer>>() {
|
||||
public int compare(Map.Entry<Nature, Integer> o1, Map.Entry<Nature, Integer> o2) {
|
||||
return o2.getValue() - o1.getValue();
|
||||
}
|
||||
});
|
||||
CoreDictionary.Attribute attribute = new CoreDictionary.Attribute(
|
||||
list.stream().map(i -> i.getKey()).collect(Collectors.toList()).toArray(new Nature[0]),
|
||||
list.stream().map(i -> i.getValue()).mapToInt(Integer::intValue).toArray(),
|
||||
list.stream().map(i -> i.getValue()).findFirst().get());
|
||||
if (old.original != null || add.original != null) {
|
||||
attribute.original = add.original != null ? add.original : old.original;
|
||||
}
|
||||
return attribute;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import java.util.Map;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class EmbeddingResult extends MapResult {
|
||||
|
||||
private String id;
|
||||
|
||||
private double distance;
|
||||
|
||||
private Map<String, String> metadata;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
EmbeddingResult that = (EmbeddingResult) o;
|
||||
return Objects.equal(id, that.id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(id);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.hankcs.hanlp.corpus.io.IIOAdapter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
@Slf4j
|
||||
public class HadoopFileIOAdapter implements IIOAdapter {
|
||||
|
||||
@Override
|
||||
public InputStream open(String path) throws IOException {
|
||||
log.info("open:{}", path);
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.get(URI.create(path), conf);
|
||||
return fs.open(new Path(path));
|
||||
}
|
||||
|
||||
@Override
|
||||
public OutputStream create(String path) throws IOException {
|
||||
log.info("create:{}", path);
|
||||
Configuration conf = new Configuration();
|
||||
FileSystem fs = FileSystem.get(URI.create(path), conf);
|
||||
return fs.create(new Path(path));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class HanlpMapResult extends MapResult {
|
||||
|
||||
private List<String> natures;
|
||||
private int offset = 0;
|
||||
|
||||
private double similarity;
|
||||
|
||||
public HanlpMapResult(String name, List<String> natures, String detectWord) {
|
||||
this.name = name;
|
||||
this.natures = natures;
|
||||
this.detectWord = detectWord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
HanlpMapResult hanlpMapResult = (HanlpMapResult) o;
|
||||
return Objects.equal(name, hanlpMapResult.name) && Objects.equal(natures, hanlpMapResult.natures);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(name, natures);
|
||||
}
|
||||
|
||||
public void setOffset(int offset) {
|
||||
this.offset = offset;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import java.io.Serializable;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
public class MapResult implements Serializable {
|
||||
|
||||
protected String name;
|
||||
protected String detectWord;
|
||||
}
|
||||
@@ -0,0 +1,397 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import static com.hankcs.hanlp.utility.Predefine.logger;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.collection.trie.DoubleArrayTrie;
|
||||
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
|
||||
import com.hankcs.hanlp.corpus.io.ByteArray;
|
||||
import com.hankcs.hanlp.corpus.io.IOUtil;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.dictionary.other.CharTable;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.hankcs.hanlp.utility.LexiconUtility;
|
||||
import com.hankcs.hanlp.utility.Predefine;
|
||||
import com.hankcs.hanlp.utility.TextUtility;
|
||||
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
public class MultiCustomDictionary extends DynamicCustomDictionary {
|
||||
|
||||
public static int MAX_SIZE = 10;
|
||||
public static Boolean removeDuplicates = true;
|
||||
public static ConcurrentHashMap<String, PriorityQueue<Term>> NATURE_TO_VALUES = new ConcurrentHashMap<>();
|
||||
private static boolean addToSuggesterTrie = true;
|
||||
|
||||
public MultiCustomDictionary() {
|
||||
this(HanLP.Config.CustomDictionaryPath);
|
||||
}
|
||||
|
||||
public MultiCustomDictionary(String... path) {
|
||||
super(path);
|
||||
}
|
||||
|
||||
/***
|
||||
* load dictionary
|
||||
* @param path
|
||||
* @param defaultNature
|
||||
* @param map
|
||||
* @param customNatureCollector
|
||||
* @param addToSuggeterTrie
|
||||
* @return
|
||||
*/
|
||||
public static boolean load(String path, Nature defaultNature, TreeMap<String, CoreDictionary.Attribute> map,
|
||||
LinkedHashSet<Nature> customNatureCollector, boolean addToSuggeterTrie) {
|
||||
try {
|
||||
String splitter = "\\s";
|
||||
if (path.endsWith(".csv")) {
|
||||
splitter = ",";
|
||||
}
|
||||
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(IOUtil.newInputStream(path), "UTF-8"));
|
||||
boolean firstLine = true;
|
||||
|
||||
while (true) {
|
||||
String[] param;
|
||||
do {
|
||||
String line;
|
||||
if ((line = br.readLine()) == null) {
|
||||
br.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (firstLine) {
|
||||
line = IOUtil.removeUTF8BOM(line);
|
||||
firstLine = false;
|
||||
}
|
||||
|
||||
param = line.split(splitter);
|
||||
} while (param[0].length() == 0);
|
||||
|
||||
if (HanLP.Config.Normalization) {
|
||||
param[0] = CharTable.convert(param[0]);
|
||||
}
|
||||
|
||||
int natureCount = (param.length - 1) / 2;
|
||||
CoreDictionary.Attribute attribute;
|
||||
boolean isLetters = isLetters(param[0]);
|
||||
String original = null;
|
||||
String word = getWordBySpace(param[0]);
|
||||
if (isLetters) {
|
||||
original = word;
|
||||
word = word.toLowerCase();
|
||||
}
|
||||
if (natureCount == 0) {
|
||||
attribute = new CoreDictionary.Attribute(defaultNature);
|
||||
} else {
|
||||
attribute = new CoreDictionary.Attribute(natureCount);
|
||||
|
||||
for (int i = 0; i < natureCount; ++i) {
|
||||
attribute.nature[i] = LexiconUtility.convertStringToNature(param[1 + 2 * i],
|
||||
customNatureCollector);
|
||||
attribute.frequency[i] = Integer.parseInt(param[2 + 2 * i]);
|
||||
attribute.totalFrequency += attribute.frequency[i];
|
||||
}
|
||||
}
|
||||
attribute.original = original;
|
||||
|
||||
if (removeDuplicates && map.containsKey(word)) {
|
||||
attribute = DictionaryAttributeUtil.getAttribute(map.get(word), attribute);
|
||||
}
|
||||
map.put(word, attribute);
|
||||
if (addToSuggeterTrie) {
|
||||
SearchService.put(word, attribute);
|
||||
}
|
||||
for (int i = 0; i < attribute.nature.length; i++) {
|
||||
Nature nature = attribute.nature[i];
|
||||
PriorityQueue<Term> priorityQueue = NATURE_TO_VALUES.get(nature.toString());
|
||||
if (Objects.isNull(priorityQueue)) {
|
||||
priorityQueue = new PriorityQueue<>(MAX_SIZE,
|
||||
Comparator.comparingInt(Term::getFrequency).reversed());
|
||||
NATURE_TO_VALUES.put(nature.toString(), priorityQueue);
|
||||
}
|
||||
Term term = new Term(word, nature);
|
||||
term.setFrequency(attribute.frequency[i]);
|
||||
if (!priorityQueue.contains(term) && priorityQueue.size() < MAX_SIZE) {
|
||||
priorityQueue.add(term);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception var12) {
|
||||
logger.severe("自定义词典" + path + "读取错误!" + var12);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean load(String... path) {
|
||||
this.path = path;
|
||||
long start = System.currentTimeMillis();
|
||||
if (!this.loadMainDictionary(path[0])) {
|
||||
Predefine.logger.warning("自定义词典" + Arrays.toString(path) + "加载失败");
|
||||
return false;
|
||||
} else {
|
||||
Predefine.logger.info(
|
||||
"自定义词典加载成功:" + this.dat.size() + "个词条,耗时" + (System.currentTimeMillis() - start) + "ms");
|
||||
this.path = path;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* load main dictionary
|
||||
* @param mainPath
|
||||
* @param path
|
||||
* @param dat
|
||||
* @param isCache
|
||||
* @param addToSuggestTrie
|
||||
* @return
|
||||
*/
|
||||
public static boolean loadMainDictionary(String mainPath, String[] path,
|
||||
DoubleArrayTrie<CoreDictionary.Attribute> dat, boolean isCache,
|
||||
boolean addToSuggestTrie) {
|
||||
Predefine.logger.info("自定义词典开始加载:" + mainPath);
|
||||
if (loadDat(mainPath, dat)) {
|
||||
return true;
|
||||
} else {
|
||||
TreeMap<String, CoreDictionary.Attribute> map = new TreeMap();
|
||||
LinkedHashSet customNatureCollector = new LinkedHashSet();
|
||||
|
||||
try {
|
||||
for (String p : path) {
|
||||
Nature defaultNature = Nature.n;
|
||||
File file = new File(p);
|
||||
String fileName = file.getName();
|
||||
int cut = fileName.lastIndexOf(32);
|
||||
if (cut > 0) {
|
||||
String nature = fileName.substring(cut + 1);
|
||||
p = file.getParent() + File.separator + fileName.substring(0, cut);
|
||||
|
||||
try {
|
||||
defaultNature = LexiconUtility.convertStringToNature(nature, customNatureCollector);
|
||||
} catch (Exception var16) {
|
||||
Predefine.logger.severe("配置文件【" + p + "】写错了!" + var16);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
Predefine.logger.info("以默认词性[" + defaultNature + "]加载自定义词典" + p + "中……");
|
||||
boolean success = load(p, defaultNature, map, customNatureCollector, addToSuggestTrie);
|
||||
if (!success) {
|
||||
Predefine.logger.warning("失败:" + p);
|
||||
}
|
||||
}
|
||||
|
||||
if (map.size() == 0) {
|
||||
Predefine.logger.warning("没有加载到任何词条");
|
||||
map.put("未##它", null);
|
||||
}
|
||||
|
||||
logger.info("正在构建DoubleArrayTrie……");
|
||||
dat.build(map);
|
||||
if (addToSuggestTrie) {
|
||||
// SearchService.save();
|
||||
}
|
||||
if (isCache) {
|
||||
// 缓存成dat文件,下次加载会快很多
|
||||
logger.info("正在缓存词典为dat文件……");
|
||||
// 缓存值文件
|
||||
List<CoreDictionary.Attribute> attributeList = new LinkedList<CoreDictionary.Attribute>();
|
||||
for (Map.Entry<String, CoreDictionary.Attribute> entry : map.entrySet()) {
|
||||
attributeList.add(entry.getValue());
|
||||
}
|
||||
|
||||
DataOutputStream out = new DataOutputStream(
|
||||
new BufferedOutputStream(IOUtil.newOutputStream(mainPath + ".bin")));
|
||||
if (customNatureCollector.isEmpty()) {
|
||||
for (int i = Nature.begin.ordinal() + 1; i < Nature.values().length; ++i) {
|
||||
Nature nature = Nature.values()[i];
|
||||
if (Objects.nonNull(nature)) {
|
||||
customNatureCollector.add(nature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOUtil.writeCustomNature(out, customNatureCollector);
|
||||
out.writeInt(attributeList.size());
|
||||
|
||||
for (CoreDictionary.Attribute attribute : attributeList) {
|
||||
attribute.save(out);
|
||||
}
|
||||
|
||||
dat.save(out);
|
||||
out.close();
|
||||
}
|
||||
} catch (FileNotFoundException var17) {
|
||||
logger.severe("自定义词典" + mainPath + "不存在!" + var17);
|
||||
return false;
|
||||
} catch (IOException var18) {
|
||||
logger.severe("自定义词典" + mainPath + "读取错误!" + var18);
|
||||
return false;
|
||||
} catch (Exception var19) {
|
||||
logger.warning("自定义词典" + mainPath + "缓存失败!\n" + TextUtility.exceptionToString(var19));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean loadMainDictionary(String mainPath) {
|
||||
return loadMainDictionary(mainPath, this.path, this.dat, true, addToSuggesterTrie);
|
||||
}
|
||||
|
||||
public static boolean loadDat(String path, DoubleArrayTrie<CoreDictionary.Attribute> dat) {
|
||||
return loadDat(path, HanLP.Config.CustomDictionaryPath, dat);
|
||||
}
|
||||
|
||||
public static boolean loadDat(String path, String[] customDicPath, DoubleArrayTrie<CoreDictionary.Attribute> dat) {
|
||||
try {
|
||||
if (HanLP.Config.CustomDictionaryAutoRefreshCache && isDicNeedUpdate(path, customDicPath)) {
|
||||
return false;
|
||||
} else {
|
||||
ByteArray byteArray = ByteArray.createByteArray(path + ".bin");
|
||||
if (byteArray == null) {
|
||||
return false;
|
||||
} else {
|
||||
int size = byteArray.nextInt();
|
||||
if (size < 0) {
|
||||
while (true) {
|
||||
++size;
|
||||
if (size > 0) {
|
||||
size = byteArray.nextInt();
|
||||
break;
|
||||
}
|
||||
|
||||
Nature.create(byteArray.nextString());
|
||||
}
|
||||
}
|
||||
|
||||
CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
|
||||
Nature[] natureIndexArray = Nature.values();
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
int currentTotalFrequency = byteArray.nextInt();
|
||||
int length = byteArray.nextInt();
|
||||
attributes[i] = new CoreDictionary.Attribute(length);
|
||||
attributes[i].totalFrequency = currentTotalFrequency;
|
||||
|
||||
for (int j = 0; j < length; ++j) {
|
||||
attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
|
||||
attributes[i].frequency[j] = byteArray.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
if (!dat.load(byteArray, attributes)) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception var11) {
|
||||
logger.warning("读取失败,问题发生在" + TextUtility.exceptionToString(var11));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isLetters(String str) {
|
||||
char[] chars = str.toCharArray();
|
||||
if (chars.length <= 1) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < chars.length; i++) {
|
||||
if ((chars[i] >= 'A' && chars[i] <= 'Z')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean isLowerLetter(String str) {
|
||||
char[] chars = str.toCharArray();
|
||||
for (int i = 0; i < chars.length; i++) {
|
||||
if ((chars[i] >= 'a' && chars[i] <= 'z')) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static String getWordBySpace(String word) {
|
||||
if (word.contains(HanlpHelper.SPACE_SPILT)) {
|
||||
return word.replace(HanlpHelper.SPACE_SPILT, " ");
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
public boolean reload() {
|
||||
if (this.path != null && this.path.length != 0) {
|
||||
IOUtil.deleteFile(this.path[0] + ".bin");
|
||||
Boolean loadCacheOk = this.loadDat(this.path[0], this.path, this.dat);
|
||||
if (!loadCacheOk) {
|
||||
return this.loadMainDictionary(this.path[0], this.path, this.dat, true, addToSuggesterTrie);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
public synchronized boolean insert(String word, String natureWithFrequency) {
|
||||
if (word == null) {
|
||||
return false;
|
||||
} else {
|
||||
if (HanLP.Config.Normalization) {
|
||||
word = CharTable.convert(word);
|
||||
}
|
||||
CoreDictionary.Attribute att = natureWithFrequency == null ? new CoreDictionary.Attribute(Nature.nz, 1)
|
||||
: CoreDictionary.Attribute.create(natureWithFrequency);
|
||||
boolean isLetters = isLetters(word);
|
||||
word = getWordBySpace(word);
|
||||
String original = null;
|
||||
if (isLetters) {
|
||||
original = word;
|
||||
word = word.toLowerCase();
|
||||
}
|
||||
if (att == null) {
|
||||
return false;
|
||||
} else if (this.dat.containsKey(word)) {
|
||||
att.original = original;
|
||||
att = DictionaryAttributeUtil.getAttribute(this.dat.get(word), att);
|
||||
this.dat.set(word, att);
|
||||
// return true;
|
||||
} else {
|
||||
if (this.trie == null) {
|
||||
this.trie = new BinTrie();
|
||||
}
|
||||
att.original = original;
|
||||
if (this.trie.containsKey(word)) {
|
||||
att = DictionaryAttributeUtil.getAttribute(this.trie.get(word), att);
|
||||
}
|
||||
this.trie.put(word, att);
|
||||
// return true;
|
||||
}
|
||||
if (addToSuggesterTrie) {
|
||||
SearchService.put(word, att);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,170 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import com.hankcs.hanlp.collection.trie.bintrie.BaseNode;
|
||||
import com.hankcs.hanlp.collection.trie.bintrie.BinTrie;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DimensionValueReq;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public class SearchService {
|
||||
|
||||
public static final int SEARCH_SIZE = 200;
|
||||
private static BinTrie<List<String>> trie;
|
||||
private static BinTrie<List<String>> suffixTrie;
|
||||
|
||||
static {
|
||||
trie = new BinTrie<>();
|
||||
suffixTrie = new BinTrie<>();
|
||||
}
|
||||
|
||||
/***
|
||||
* prefix Search
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public static List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> detectModelIds) {
|
||||
return prefixSearch(key, limit, trie, detectModelIds);
|
||||
}
|
||||
|
||||
public static List<HanlpMapResult> prefixSearch(String key, int limit, BinTrie<List<String>> binTrie,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, detectModelIds);
|
||||
return result.stream().map(
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
return new HanlpMapResult(name, entry.getValue(), key);
|
||||
}
|
||||
).sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.limit(SEARCH_SIZE)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
/***
|
||||
* suffix Search
|
||||
* @param key
|
||||
* @return
|
||||
*/
|
||||
public static List<HanlpMapResult> suffixSearch(String key, int limit, Set<Long> detectModelIds) {
|
||||
String reverseDetectSegment = StringUtils.reverse(key);
|
||||
return suffixSearch(reverseDetectSegment, limit, suffixTrie, detectModelIds);
|
||||
}
|
||||
|
||||
public static List<HanlpMapResult> suffixSearch(String key, int limit, BinTrie<List<String>> binTrie,
|
||||
Set<Long> detectModelIds) {
|
||||
Set<Map.Entry<String, List<String>>> result = prefixSearchLimit(key, limit, binTrie, detectModelIds);
|
||||
return result.stream().map(
|
||||
entry -> {
|
||||
String name = entry.getKey().replace("#", " ");
|
||||
List<String> natures = entry.getValue().stream()
|
||||
.map(nature -> nature.replaceAll(DictWordType.SUFFIX.getType(), ""))
|
||||
.collect(Collectors.toList());
|
||||
name = StringUtils.reverse(name);
|
||||
return new HanlpMapResult(name, natures, key);
|
||||
}
|
||||
).sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.limit(SEARCH_SIZE)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static Set<Map.Entry<String, List<String>>> prefixSearchLimit(String key, int limit,
|
||||
BinTrie<List<String>> binTrie, Set<Long> detectModelIds) {
|
||||
key = key.toLowerCase();
|
||||
Set<Map.Entry<String, List<String>>> entrySet = new TreeSet<Map.Entry<String, List<String>>>();
|
||||
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (StringUtils.isNotBlank(key)) {
|
||||
sb = new StringBuilder(key.substring(0, key.length() - 1));
|
||||
}
|
||||
BaseNode branch = binTrie;
|
||||
char[] chars = key.toCharArray();
|
||||
for (char aChar : chars) {
|
||||
if (branch == null) {
|
||||
return entrySet;
|
||||
}
|
||||
branch = branch.getChild(aChar);
|
||||
}
|
||||
|
||||
if (branch == null) {
|
||||
return entrySet;
|
||||
}
|
||||
branch.walkLimit(sb, entrySet, limit, detectModelIds);
|
||||
return entrySet;
|
||||
}
|
||||
|
||||
public static void clear() {
|
||||
log.info("clear all trie");
|
||||
trie = new BinTrie<>();
|
||||
suffixTrie = new BinTrie<>();
|
||||
}
|
||||
|
||||
public static void put(String key, CoreDictionary.Attribute attribute) {
|
||||
trie.put(key, getValue(attribute.nature));
|
||||
}
|
||||
|
||||
public static void loadSuffix(List<DictWord> suffixes) {
|
||||
if (CollectionUtils.isEmpty(suffixes)) {
|
||||
return;
|
||||
}
|
||||
TreeMap<String, CoreDictionary.Attribute> map = new TreeMap();
|
||||
for (DictWord suffix : suffixes) {
|
||||
CoreDictionary.Attribute attributeNew = suffix.getNatureWithFrequency() == null
|
||||
? new CoreDictionary.Attribute(Nature.nz, 1)
|
||||
: CoreDictionary.Attribute.create(suffix.getNatureWithFrequency());
|
||||
if (map.containsKey(suffix.getWord())) {
|
||||
attributeNew = DictionaryAttributeUtil.getAttribute(map.get(suffix.getWord()), attributeNew);
|
||||
}
|
||||
map.put(suffix.getWord(), attributeNew);
|
||||
}
|
||||
for (Map.Entry<String, CoreDictionary.Attribute> stringAttributeEntry : map.entrySet()) {
|
||||
putSuffix(stringAttributeEntry.getKey(), stringAttributeEntry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
public static void putSuffix(String key, CoreDictionary.Attribute attribute) {
|
||||
Nature[] nature = attribute.nature;
|
||||
suffixTrie.put(key, getValue(nature));
|
||||
}
|
||||
|
||||
private static List<String> getValue(Nature[] nature) {
|
||||
return Arrays.stream(nature).map(entry -> entry.toString()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static void remove(DictWord dictWord, Nature[] natures) {
|
||||
trie.remove(dictWord.getWord());
|
||||
if (Objects.nonNull(natures) && natures.length > 0) {
|
||||
trie.put(dictWord.getWord(), getValue(natures));
|
||||
}
|
||||
if (dictWord.getNature().contains(DictWordType.METRIC.getType()) || dictWord.getNature()
|
||||
.contains(DictWordType.DIMENSION.getType())) {
|
||||
suffixTrie.remove(dictWord.getWord());
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> getDimensionValue(DimensionValueReq dimensionValueReq) {
|
||||
String nature = DictWordType.NATURE_SPILT + dimensionValueReq.getModelId() + DictWordType.NATURE_SPILT
|
||||
+ dimensionValueReq.getElementID();
|
||||
PriorityQueue<Term> terms = MultiCustomDictionary.NATURE_TO_VALUES.get(nature);
|
||||
if (org.apache.commons.collections.CollectionUtils.isEmpty(terms)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return terms.stream().map(term -> term.getWord()).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
@Builder
|
||||
public class ViewInfoStat implements Serializable {
|
||||
|
||||
private long viewCount;
|
||||
|
||||
private long metricViewCount;
|
||||
|
||||
private long dimensionViewCount;
|
||||
|
||||
private long dimensionValueViewCount;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
/**
|
||||
* base word nature
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class BaseWordBuilder {
|
||||
|
||||
public static final Long DEFAULT_FREQUENCY = 100000L;
|
||||
|
||||
public List<DictWord> getDictWords(List<SchemaElement> schemaElements) {
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
try {
|
||||
dictWords = getDictWordsWithException(schemaElements);
|
||||
} catch (Exception e) {
|
||||
log.error("getWordNatureList error,", e);
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
protected List<DictWord> getDictWordsWithException(List<SchemaElement> schemaElements) {
|
||||
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
|
||||
for (SchemaElement schemaElement : schemaElements) {
|
||||
dictWords.addAll(doGet(schemaElement.getName(), schemaElement));
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
protected abstract List<DictWord> doGet(String word, SchemaElement schemaElement);
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* dimension word nature
|
||||
*/
|
||||
@Service
|
||||
public class DimensionWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Value("${nlp.dimension.use.suffix:true}")
|
||||
private boolean nlpDimensionUseSuffix = true;
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, schemaElement, false));
|
||||
result.addAll(getOnwWordNatureAlias(schemaElement, false));
|
||||
if (nlpDimensionUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (StringUtils.isNotEmpty(word) && !word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, schemaElement, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.DIMENSION.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.DIMENSION.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
return dictWord;
|
||||
}
|
||||
|
||||
private List<DictWord> getOnwWordNatureAlias(SchemaElement schemaElement, boolean isSuffix) {
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
if (CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
for (String alias : schemaElement.getAlias()) {
|
||||
dictWords.add(getOnwWordNature(alias, schemaElement, false));
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EntityWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
|
||||
if (Objects.isNull(schemaElement)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
Long view = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + view + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.ENTITY.getType();
|
||||
|
||||
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
schemaElement.getAlias().stream().forEach(alias -> {
|
||||
DictWord dictWordAlias = new DictWord();
|
||||
dictWordAlias.setWord(alias);
|
||||
dictWordAlias.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY * 2, nature));
|
||||
result.add(dictWordAlias);
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Metric DictWord
|
||||
*/
|
||||
@Service
|
||||
public class MetricWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Value("${nlp.metric.use.suffix:true}")
|
||||
private boolean nlpMetricUseSuffix = true;
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
result.add(getOnwWordNature(word, schemaElement, false));
|
||||
result.addAll(getOnwWordNatureAlias(schemaElement, false));
|
||||
if (nlpMetricUseSuffix) {
|
||||
String reverseWord = StringUtils.reverse(word);
|
||||
if (!word.equalsIgnoreCase(reverseWord)) {
|
||||
result.add(getOnwWordNature(reverseWord, schemaElement, true));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DictWord getOnwWordNature(String word, SchemaElement schemaElement, boolean isSuffix) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.METRIC.getType();
|
||||
if (isSuffix) {
|
||||
nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId()
|
||||
+ DictWordType.SUFFIX.getType() + DictWordType.METRIC.getType();
|
||||
}
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
return dictWord;
|
||||
}
|
||||
|
||||
private List<DictWord> getOnwWordNatureAlias(SchemaElement schemaElement, boolean isSuffix) {
|
||||
List<DictWord> dictWords = new ArrayList<>();
|
||||
if (CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
for (String alias : schemaElement.getAlias()) {
|
||||
dictWords.add(getOnwWordNature(alias, schemaElement, false));
|
||||
}
|
||||
return dictWords;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* model word nature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ModelWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
//modelName
|
||||
DictWord dictWord = buildDictWord(word, schemaElement.getView());
|
||||
result.add(dictWord);
|
||||
//alias
|
||||
List<String> aliasList = schemaElement.getAlias();
|
||||
if (CollectionUtils.isNotEmpty(aliasList)) {
|
||||
for (String alias : aliasList) {
|
||||
result.add(buildDictWord(alias, schemaElement.getView()));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DictWord buildDictWord(String word, Long modelId) {
|
||||
DictWord dictWord = new DictWord();
|
||||
dictWord.setWord(word);
|
||||
String nature = DictWordType.NATURE_SPILT + modelId;
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
return dictWord;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* dimension value wordNature
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ValueWordBuilder extends BaseWordBuilder {
|
||||
|
||||
@Override
|
||||
public List<DictWord> doGet(String word, SchemaElement schemaElement) {
|
||||
|
||||
List<DictWord> result = Lists.newArrayList();
|
||||
if (Objects.nonNull(schemaElement) && !CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
|
||||
schemaElement.getAlias().stream().forEach(value -> {
|
||||
DictWord dictWord = new DictWord();
|
||||
Long viewId = schemaElement.getView();
|
||||
String nature = DictWordType.NATURE_SPILT + viewId + DictWordType.NATURE_SPILT + schemaElement.getId();
|
||||
dictWord.setNatureWithFrequency(String.format("%s " + DEFAULT_FREQUENCY, nature));
|
||||
dictWord.setWord(value);
|
||||
result.add(dictWord);
|
||||
});
|
||||
}
|
||||
log.debug("ValueWordBuilder, result:{}", result);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.builder;
|
||||
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* DictWord Strategy Factory
|
||||
*/
|
||||
public class WordBuilderFactory {
|
||||
|
||||
private static Map<DictWordType, BaseWordBuilder> wordNatures = new ConcurrentHashMap<>();
|
||||
|
||||
static {
|
||||
wordNatures.put(DictWordType.DIMENSION, new DimensionWordBuilder());
|
||||
wordNatures.put(DictWordType.METRIC, new MetricWordBuilder());
|
||||
wordNatures.put(DictWordType.VIEW, new ModelWordBuilder());
|
||||
wordNatures.put(DictWordType.ENTITY, new EntityWordBuilder());
|
||||
wordNatures.put(DictWordType.VALUE, new ValueWordBuilder());
|
||||
}
|
||||
|
||||
public static BaseWordBuilder get(DictWordType strategyType) {
|
||||
return wordNatures.get(strategyType);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class FileHelper {
|
||||
|
||||
public static final String FILE_SPILT = File.separator;
|
||||
|
||||
public static void deleteCacheFile(String[] path) throws IOException {
|
||||
|
||||
String customPath = getCustomPath(path);
|
||||
File customFolder = new File(customPath);
|
||||
|
||||
File[] customSubFiles = getFileList(customFolder, ".bin");
|
||||
|
||||
for (File file : customSubFiles) {
|
||||
try {
|
||||
file.delete();
|
||||
log.info("customPath:{},delete file:{}", customPath, file);
|
||||
} catch (Exception e) {
|
||||
log.error("delete " + file, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static File[] getFileList(File customFolder, String suffix) {
|
||||
File[] customSubFiles = customFolder.listFiles(file -> {
|
||||
if (file.isDirectory()) {
|
||||
return false;
|
||||
}
|
||||
if (file.getName().toLowerCase().endsWith(suffix)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
return customSubFiles;
|
||||
}
|
||||
|
||||
private static String getCustomPath(String[] path) {
|
||||
return path[0].substring(0, path[0].lastIndexOf(FILE_SPILT)) + FILE_SPILT;
|
||||
}
|
||||
|
||||
/**
|
||||
* reset path
|
||||
*
|
||||
* @param customDictionary
|
||||
*/
|
||||
public static void resetCustomPath(DynamicCustomDictionary customDictionary) {
|
||||
String[] path = CustomDictionaryPath;
|
||||
|
||||
String customPath = getCustomPath(path);
|
||||
File customFolder = new File(customPath);
|
||||
|
||||
File[] customSubFiles = getFileList(customFolder, ".txt");
|
||||
|
||||
List<String> fileList = new ArrayList<>();
|
||||
|
||||
for (File file : customSubFiles) {
|
||||
if (file.isFile()) {
|
||||
fileList.add(file.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("CustomDictionaryPath:{}", fileList);
|
||||
CustomDictionaryPath = fileList.toArray(new String[0]);
|
||||
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
|
||||
: CustomDictionaryPath;
|
||||
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
|
||||
CustomDictionaryPath = path;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,229 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.hankcs.hanlp.dictionary.CoreDictionary;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.seg.Segment;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.headless.core.knowledge.HadoopFileIOAdapter;
|
||||
import com.tencent.supersonic.headless.core.knowledge.MapResult;
|
||||
import com.tencent.supersonic.headless.core.knowledge.MultiCustomDictionary;
|
||||
import com.tencent.supersonic.headless.core.knowledge.SearchService;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
|
||||
/**
|
||||
* HanLP helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class HanlpHelper {
|
||||
|
||||
public static final String FILE_SPILT = File.separator;
|
||||
public static final String SPACE_SPILT = "#";
|
||||
private static volatile DynamicCustomDictionary CustomDictionary;
|
||||
private static volatile Segment segment;
|
||||
|
||||
static {
|
||||
// reset hanlp config
|
||||
try {
|
||||
resetHanlpConfig();
|
||||
} catch (FileNotFoundException e) {
|
||||
log.error("resetHanlpConfig error", e);
|
||||
}
|
||||
}
|
||||
|
||||
public static Segment getSegment() {
|
||||
if (segment == null) {
|
||||
synchronized (HanlpHelper.class) {
|
||||
if (segment == null) {
|
||||
segment = HanLP.newSegment()
|
||||
.enableIndexMode(true).enableIndexMode(4)
|
||||
.enableCustomDictionary(true).enableCustomDictionaryForcing(true).enableOffset(true)
|
||||
.enableJapaneseNameRecognize(false).enableNameRecognize(false)
|
||||
.enableAllNamedEntityRecognize(false)
|
||||
.enableJapaneseNameRecognize(false).enableNumberQuantifierRecognize(false)
|
||||
.enablePlaceRecognize(false)
|
||||
.enableOrganizationRecognize(false).enableCustomDictionary(getDynamicCustomDictionary());
|
||||
}
|
||||
}
|
||||
}
|
||||
return segment;
|
||||
}
|
||||
|
||||
public static DynamicCustomDictionary getDynamicCustomDictionary() {
|
||||
if (CustomDictionary == null) {
|
||||
synchronized (HanlpHelper.class) {
|
||||
if (CustomDictionary == null) {
|
||||
CustomDictionary = new MultiCustomDictionary(CustomDictionaryPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
return CustomDictionary;
|
||||
}
|
||||
|
||||
/***
|
||||
* reload custom dictionary
|
||||
*/
|
||||
public static boolean reloadCustomDictionary() throws IOException {
|
||||
|
||||
log.info("reloadCustomDictionary start");
|
||||
|
||||
final long startTime = System.currentTimeMillis();
|
||||
|
||||
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
|
||||
return false;
|
||||
}
|
||||
if (HanLP.Config.IOAdapter instanceof HadoopFileIOAdapter) {
|
||||
// 1.delete hdfs file
|
||||
HdfsFileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
|
||||
// 2.query txt files,update CustomDictionaryPath
|
||||
HdfsFileHelper.resetCustomPath(getDynamicCustomDictionary());
|
||||
} else {
|
||||
FileHelper.deleteCacheFile(HanLP.Config.CustomDictionaryPath);
|
||||
FileHelper.resetCustomPath(getDynamicCustomDictionary());
|
||||
}
|
||||
// 3.clear trie
|
||||
SearchService.clear();
|
||||
|
||||
boolean reload = getDynamicCustomDictionary().reload();
|
||||
log.info("reloadCustomDictionary end ,cost:{},reload:{}", System.currentTimeMillis() - startTime, reload);
|
||||
return reload;
|
||||
}
|
||||
|
||||
private static void resetHanlpConfig() throws FileNotFoundException {
|
||||
if (HanLP.Config.IOAdapter instanceof HadoopFileIOAdapter) {
|
||||
return;
|
||||
}
|
||||
String hanlpPropertiesPath = getHanlpPropertiesPath();
|
||||
|
||||
HanLP.Config.CustomDictionaryPath = Arrays.stream(HanLP.Config.CustomDictionaryPath)
|
||||
.map(path -> hanlpPropertiesPath + FILE_SPILT + path)
|
||||
.toArray(String[]::new);
|
||||
log.info("hanlpPropertiesPath:{},CustomDictionaryPath:{}", hanlpPropertiesPath,
|
||||
HanLP.Config.CustomDictionaryPath);
|
||||
|
||||
HanLP.Config.CoreDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
|
||||
HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath;
|
||||
HanLP.Config.BiGramDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.BiGramDictionaryPath;
|
||||
HanLP.Config.CoreStopWordDictionaryPath =
|
||||
hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CoreStopWordDictionaryPath;
|
||||
HanLP.Config.CoreSynonymDictionaryDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.CoreSynonymDictionaryDictionaryPath;
|
||||
HanLP.Config.PersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PersonDictionaryPath;
|
||||
HanLP.Config.PersonDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PersonDictionaryTrPath;
|
||||
|
||||
HanLP.Config.PinyinDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PinyinDictionaryPath;
|
||||
HanLP.Config.TranslatedPersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.TranslatedPersonDictionaryPath;
|
||||
HanLP.Config.JapanesePersonDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.JapanesePersonDictionaryPath;
|
||||
HanLP.Config.PlaceDictionaryPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PlaceDictionaryPath;
|
||||
HanLP.Config.PlaceDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PlaceDictionaryTrPath;
|
||||
HanLP.Config.OrganizationDictionaryPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.OrganizationDictionaryPath;
|
||||
HanLP.Config.OrganizationDictionaryTrPath = hanlpPropertiesPath + FILE_SPILT
|
||||
+ HanLP.Config.OrganizationDictionaryTrPath;
|
||||
HanLP.Config.CharTypePath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CharTypePath;
|
||||
HanLP.Config.CharTablePath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CharTablePath;
|
||||
HanLP.Config.PartOfSpeechTagDictionary =
|
||||
hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PartOfSpeechTagDictionary;
|
||||
HanLP.Config.WordNatureModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.WordNatureModelPath;
|
||||
HanLP.Config.MaxEntModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.MaxEntModelPath;
|
||||
HanLP.Config.NNParserModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.NNParserModelPath;
|
||||
HanLP.Config.PerceptronParserModelPath =
|
||||
hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronParserModelPath;
|
||||
HanLP.Config.CRFSegmentModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFSegmentModelPath;
|
||||
HanLP.Config.HMMSegmentModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.HMMSegmentModelPath;
|
||||
HanLP.Config.CRFCWSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFCWSModelPath;
|
||||
HanLP.Config.CRFPOSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFPOSModelPath;
|
||||
HanLP.Config.CRFNERModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.CRFNERModelPath;
|
||||
HanLP.Config.PerceptronCWSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronCWSModelPath;
|
||||
HanLP.Config.PerceptronPOSModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronPOSModelPath;
|
||||
HanLP.Config.PerceptronNERModelPath = hanlpPropertiesPath + FILE_SPILT + HanLP.Config.PerceptronNERModelPath;
|
||||
}
|
||||
|
||||
public static String getHanlpPropertiesPath() throws FileNotFoundException {
|
||||
return ResourceUtils.getFile("classpath:hanlp.properties").getParent();
|
||||
}
|
||||
|
||||
public static boolean addToCustomDictionary(DictWord dictWord) {
|
||||
log.info("dictWord:{}", dictWord);
|
||||
return getDynamicCustomDictionary().insert(dictWord.getWord(), dictWord.getNatureWithFrequency());
|
||||
}
|
||||
|
||||
public static void removeFromCustomDictionary(DictWord dictWord) {
|
||||
log.info("dictWord:{}", dictWord);
|
||||
CoreDictionary.Attribute attribute = getDynamicCustomDictionary().get(dictWord.getWord());
|
||||
if (attribute == null) {
|
||||
return;
|
||||
}
|
||||
log.info("get attribute:{}", attribute);
|
||||
getDynamicCustomDictionary().remove(dictWord.getWord());
|
||||
StringBuilder sb = new StringBuilder();
|
||||
List<Nature> natureList = new ArrayList<>();
|
||||
for (int i = 0; i < attribute.nature.length; i++) {
|
||||
if (!attribute.nature[i].toString().equals(dictWord.getNature())) {
|
||||
sb.append(attribute.nature[i].toString() + " ");
|
||||
sb.append(attribute.frequency[i] + " ");
|
||||
natureList.add((attribute.nature[i]));
|
||||
}
|
||||
}
|
||||
String natureWithFrequency = sb.toString();
|
||||
int len = natureWithFrequency.length();
|
||||
log.info("filtered natureWithFrequency:{}", natureWithFrequency);
|
||||
if (StringUtils.isNotBlank(natureWithFrequency)) {
|
||||
getDynamicCustomDictionary().add(dictWord.getWord(), natureWithFrequency.substring(0, len - 1));
|
||||
}
|
||||
SearchService.remove(dictWord, natureList.toArray(new Nature[0]));
|
||||
}
|
||||
|
||||
public static <T extends MapResult> void transLetterOriginal(List<T> mapResults) {
|
||||
if (CollectionUtils.isEmpty(mapResults)) {
|
||||
return;
|
||||
}
|
||||
for (T mapResult : mapResults) {
|
||||
if (MultiCustomDictionary.isLowerLetter(mapResult.getName())) {
|
||||
if (CustomDictionary.contains(mapResult.getName())) {
|
||||
CoreDictionary.Attribute attribute = CustomDictionary.get(mapResult.getName());
|
||||
if (attribute != null && attribute.original != null) {
|
||||
mapResult.setName(attribute.original);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static List<com.tencent.supersonic.headless.api.pojo.response.S2Term> getTerms(String text) {
|
||||
return getSegment().seg(text.toLowerCase()).stream()
|
||||
.filter(term -> term.getNature().startsWith(DictWordType.NATURE_SPILT))
|
||||
.map(term -> transform2ApiTerm(term))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static S2Term transform2ApiTerm(Term term) {
|
||||
S2Term knowledgeTerm = new S2Term();
|
||||
BeanUtils.copyProperties(term, knowledgeTerm);
|
||||
knowledgeTerm.setFrequency(term.getFrequency());
|
||||
return knowledgeTerm;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import static com.hankcs.hanlp.HanLP.Config.CustomDictionaryPath;
|
||||
|
||||
import com.hankcs.hanlp.HanLP;
|
||||
import com.hankcs.hanlp.dictionary.DynamicCustomDictionary;
|
||||
import com.hankcs.hanlp.utility.Predefine;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
||||
/**
|
||||
* Hdfs File Helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class HdfsFileHelper {
|
||||
|
||||
/***
|
||||
* delete cache file
|
||||
* @param path
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void deleteCacheFile(String[] path) throws IOException {
|
||||
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
|
||||
String cacheFilePath = path[0] + Predefine.BIN_EXT;
|
||||
log.info("delete cache file:{}", cacheFilePath);
|
||||
try {
|
||||
fs.delete(new Path(cacheFilePath), false);
|
||||
} catch (Exception e) {
|
||||
log.error("delete:" + cacheFilePath, e);
|
||||
}
|
||||
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.bin";
|
||||
List<String> fileList = getFileList(fs, new Path(customPath));
|
||||
for (String file : fileList) {
|
||||
try {
|
||||
fs.delete(new Path(file), false);
|
||||
log.info("delete cache file:{}", file);
|
||||
} catch (Exception e) {
|
||||
log.error("delete " + file, e);
|
||||
}
|
||||
}
|
||||
log.info("fileList:{}", fileList);
|
||||
}
|
||||
|
||||
/**
|
||||
* reset path
|
||||
*
|
||||
* @param customDictionary
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void resetCustomPath(DynamicCustomDictionary customDictionary) throws IOException {
|
||||
String[] path = HanLP.Config.CustomDictionaryPath;
|
||||
FileSystem fs = FileSystem.get(URI.create(path[0]), new Configuration());
|
||||
String cacheFilePath = path[0] + Predefine.BIN_EXT;
|
||||
int customBase = cacheFilePath.lastIndexOf(FileHelper.FILE_SPILT);
|
||||
String customPath = cacheFilePath.substring(0, customBase) + FileHelper.FILE_SPILT + "*.txt";
|
||||
log.info("customPath:{}", customPath);
|
||||
List<String> fileList = getFileList(fs, new Path(customPath));
|
||||
log.info("CustomDictionaryPath:{}", fileList);
|
||||
CustomDictionaryPath = fileList.toArray(new String[0]);
|
||||
customDictionary.path = (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) ? path
|
||||
: CustomDictionaryPath;
|
||||
if (CustomDictionaryPath == null || CustomDictionaryPath.length == 0) {
|
||||
CustomDictionaryPath = path;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> getFileList(FileSystem fs, Path folderPath) throws IOException {
|
||||
List<String> paths = new ArrayList();
|
||||
FileStatus[] fileStatuses = fs.globStatus(folderPath);
|
||||
for (int i = 0; i < fileStatuses.length; i++) {
|
||||
FileStatus fileStatus = fileStatuses[i];
|
||||
paths.add(fileStatus.getPath().toString());
|
||||
}
|
||||
return paths;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
package com.tencent.supersonic.headless.core.knowledge.helper;
|
||||
|
||||
import com.hankcs.hanlp.corpus.tag.Nature;
|
||||
import com.tencent.supersonic.headless.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.ViewInfoStat;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* nature parse helper
|
||||
*/
|
||||
@Slf4j
|
||||
public class NatureHelper {
|
||||
|
||||
public static SchemaElementType convertToElementType(String nature) {
|
||||
DictWordType dictWordType = DictWordType.getNatureType(nature);
|
||||
if (Objects.isNull(dictWordType)) {
|
||||
return null;
|
||||
}
|
||||
SchemaElementType result = null;
|
||||
switch (dictWordType) {
|
||||
case METRIC:
|
||||
result = SchemaElementType.METRIC;
|
||||
break;
|
||||
case DIMENSION:
|
||||
result = SchemaElementType.DIMENSION;
|
||||
break;
|
||||
case ENTITY:
|
||||
result = SchemaElementType.ENTITY;
|
||||
break;
|
||||
case VIEW:
|
||||
result = SchemaElementType.VIEW;
|
||||
break;
|
||||
case VALUE:
|
||||
result = SchemaElementType.VALUE;
|
||||
break;
|
||||
case TAG:
|
||||
result = SchemaElementType.TAG;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private static boolean isViewOrEntity(S2Term term, Integer model) {
|
||||
return (DictWordType.NATURE_SPILT + model).equals(term.nature.toString()) || term.nature.toString()
|
||||
.endsWith(DictWordType.ENTITY.getType());
|
||||
}
|
||||
|
||||
public static Integer getViewByNature(Nature nature) {
|
||||
if (nature.startsWith(DictWordType.NATURE_SPILT)) {
|
||||
String[] dimensionValues = nature.toString().split(DictWordType.NATURE_SPILT);
|
||||
if (StringUtils.isNumeric(dimensionValues[1])) {
|
||||
return Integer.valueOf(dimensionValues[1]);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
public static Long getViewId(String nature) {
|
||||
try {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return null;
|
||||
}
|
||||
return Long.valueOf(split[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
log.error("", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static boolean isDimensionValueViewId(String nature) {
|
||||
if (StringUtils.isEmpty(nature)) {
|
||||
return false;
|
||||
}
|
||||
if (!nature.startsWith(DictWordType.NATURE_SPILT)) {
|
||||
return false;
|
||||
}
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length <= 1) {
|
||||
return false;
|
||||
}
|
||||
return !nature.endsWith(DictWordType.METRIC.getType()) && !nature.endsWith(DictWordType.DIMENSION.getType())
|
||||
&& StringUtils.isNumeric(split[1]);
|
||||
}
|
||||
|
||||
public static ViewInfoStat getViewStat(List<S2Term> terms) {
|
||||
return ViewInfoStat.builder()
|
||||
.viewCount(getViewCount(terms))
|
||||
.dimensionViewCount(getDimensionCount(terms))
|
||||
.metricViewCount(getMetricCount(terms))
|
||||
.dimensionValueViewCount(getDimensionValueCount(terms))
|
||||
.build();
|
||||
}
|
||||
|
||||
private static long getViewCount(List<S2Term> terms) {
|
||||
return terms.stream().filter(term -> isViewOrEntity(term, getViewByNature(term.nature))).count();
|
||||
}
|
||||
|
||||
private static long getDimensionValueCount(List<S2Term> terms) {
|
||||
return terms.stream().filter(term -> isDimensionValueViewId(term.nature.toString())).count();
|
||||
}
|
||||
|
||||
private static long getDimensionCount(List<S2Term> terms) {
|
||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||
.endsWith(DictWordType.DIMENSION.getType())).count();
|
||||
}
|
||||
|
||||
private static long getMetricCount(List<S2Term> terms) {
|
||||
return terms.stream().filter(term -> term.nature.startsWith(DictWordType.NATURE_SPILT) && term.nature.toString()
|
||||
.endsWith(DictWordType.METRIC.getType())).count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of types of class parts of speech
|
||||
* modelId -> (nature , natureCount)
|
||||
*
|
||||
* @param terms
|
||||
* @return
|
||||
*/
|
||||
public static Map<Long, Map<DictWordType, Integer>> getViewToNatureStat(List<S2Term> terms) {
|
||||
Map<Long, Map<DictWordType, Integer>> modelToNature = new HashMap<>();
|
||||
terms.stream().filter(
|
||||
term -> term.nature.startsWith(DictWordType.NATURE_SPILT)
|
||||
).forEach(term -> {
|
||||
DictWordType dictWordType = DictWordType.getNatureType(String.valueOf(term.nature));
|
||||
Long model = getViewId(String.valueOf(term.nature));
|
||||
|
||||
Map<DictWordType, Integer> natureTypeMap = new HashMap<>();
|
||||
natureTypeMap.put(dictWordType, 1);
|
||||
|
||||
Map<DictWordType, Integer> original = modelToNature.get(model);
|
||||
if (Objects.isNull(original)) {
|
||||
modelToNature.put(model, natureTypeMap);
|
||||
} else {
|
||||
Integer count = original.get(dictWordType);
|
||||
if (Objects.isNull(count)) {
|
||||
count = 1;
|
||||
} else {
|
||||
count = count + 1;
|
||||
}
|
||||
original.put(dictWordType, count);
|
||||
}
|
||||
});
|
||||
return modelToNature;
|
||||
}
|
||||
|
||||
public static List<Long> selectPossibleViews(List<S2Term> terms) {
|
||||
Map<Long, Map<DictWordType, Integer>> modelToNatureStat = getViewToNatureStat(terms);
|
||||
Integer maxViewTypeSize = modelToNatureStat.entrySet().stream()
|
||||
.max(Comparator.comparingInt(o -> o.getValue().size())).map(entry -> entry.getValue().size())
|
||||
.orElse(null);
|
||||
if (Objects.isNull(maxViewTypeSize) || maxViewTypeSize == 0) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return modelToNatureStat.entrySet().stream().filter(entry -> entry.getValue().size() == maxViewTypeSize)
|
||||
.map(entry -> entry.getKey()).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public static Long getElementID(String nature) {
|
||||
String[] split = nature.split(DictWordType.NATURE_SPILT);
|
||||
if (split.length >= 3) {
|
||||
return Long.valueOf(split[2]);
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
com.tencent.supersonic.headless.core.file.FileHandler=\
|
||||
com.tencent.supersonic.headless.core.file.FileHandlerImpl
|
||||
@@ -17,9 +17,6 @@
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
|
||||
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
@@ -115,6 +112,8 @@
|
||||
<artifactId>postgresql</artifactId>
|
||||
<version>${postgresql.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.dataobject;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@TableName("s2_dictionary_conf")
|
||||
public class DictConfDO {
|
||||
@TableId(type = IdType.AUTO)
|
||||
private Long id;
|
||||
private String description;
|
||||
private String type;
|
||||
private Long itemId;
|
||||
private String config;
|
||||
private String status;
|
||||
private Date createdAt;
|
||||
private String createdBy;
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.dataobject;
|
||||
|
||||
import com.baomidou.mybatisplus.annotation.IdType;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@TableName("s2_dictionary_task")
|
||||
public class DictTaskDO {
|
||||
@TableId(type = IdType.AUTO)
|
||||
private Long id;
|
||||
private String name;
|
||||
private String description;
|
||||
private String type;
|
||||
private Long itemId;
|
||||
private String config;
|
||||
private String status;
|
||||
private Date createdAt;
|
||||
private String createdBy;
|
||||
private Long elapsedMs;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictConfMapper extends BaseMapper<DictConfDO> {
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.mapper;
|
||||
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
|
||||
import org.apache.ibatis.annotations.Mapper;
|
||||
|
||||
@Mapper
|
||||
public interface DictTaskMapper extends BaseMapper<DictTaskDO> {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.repository;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public interface DictRepository {
|
||||
|
||||
Long addDictConf(DictConfDO dictConfDO);
|
||||
|
||||
Long editDictConf(DictConfDO dictConfDO);
|
||||
|
||||
List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter);
|
||||
|
||||
Long addDictTask(DictTaskDO dictTaskDO);
|
||||
|
||||
Long editDictTask(DictTaskDO dictTaskDO);
|
||||
|
||||
DictTaskDO queryDictTask(DictItemFilter filter);
|
||||
|
||||
DictTaskDO queryDictTaskById(Long id);
|
||||
|
||||
DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq);
|
||||
}
|
||||
@@ -0,0 +1,142 @@
|
||||
package com.tencent.supersonic.headless.server.persistence.repository.impl;
|
||||
|
||||
|
||||
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DimensionResp;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.mapper.DictConfMapper;
|
||||
import com.tencent.supersonic.headless.server.persistence.mapper.DictTaskMapper;
|
||||
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
|
||||
import com.tencent.supersonic.headless.server.service.DimensionService;
|
||||
import com.tencent.supersonic.headless.server.utils.DictUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Slf4j
|
||||
@Repository
|
||||
public class DictRepositoryImpl implements DictRepository {
|
||||
|
||||
private final DictTaskMapper dictTaskMapper;
|
||||
private final DictConfMapper dictConfMapper;
|
||||
private final DictUtils dictConverter;
|
||||
private final DimensionService dimensionService;
|
||||
|
||||
public DictRepositoryImpl(DictTaskMapper dictTaskMapper, DictConfMapper dictConfMapper,
|
||||
DictUtils dictConverter, DimensionService dimensionService) {
|
||||
this.dictTaskMapper = dictTaskMapper;
|
||||
this.dictConfMapper = dictConfMapper;
|
||||
this.dictConverter = dictConverter;
|
||||
this.dimensionService = dimensionService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long addDictTask(DictTaskDO dictTaskDO) {
|
||||
dictTaskMapper.insert(dictTaskDO);
|
||||
return dictTaskDO.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long editDictTask(DictTaskDO dictTaskDO) {
|
||||
dictTaskMapper.updateById(dictTaskDO);
|
||||
return dictTaskDO.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictTaskDO queryDictTask(DictItemFilter filter) {
|
||||
QueryWrapper<DictTaskDO> wrapper = new QueryWrapper<>();
|
||||
if (Objects.nonNull(filter.getItemId())) {
|
||||
wrapper.lambda().eq(DictTaskDO::getItemId, filter.getItemId());
|
||||
}
|
||||
if (Objects.nonNull(filter.getType())) {
|
||||
wrapper.lambda().eq(DictTaskDO::getType, filter.getType());
|
||||
}
|
||||
if (Objects.nonNull(filter.getId())) {
|
||||
wrapper.lambda().eq(DictTaskDO::getId, filter.getId());
|
||||
}
|
||||
|
||||
List<DictTaskDO> dictTaskDOList = dictTaskMapper.selectList(wrapper);
|
||||
if (CollectionUtils.isEmpty(dictTaskDOList)) {
|
||||
return null;
|
||||
}
|
||||
return dictTaskDOList.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictTaskDO queryDictTaskById(Long id) {
|
||||
return dictTaskMapper.selectById(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq) {
|
||||
DictTaskResp taskResp = new DictTaskResp();
|
||||
QueryWrapper<DictTaskDO> wrapper = new QueryWrapper<>();
|
||||
wrapper.lambda().eq(DictTaskDO::getItemId, taskReq.getItemId());
|
||||
wrapper.lambda().eq(DictTaskDO::getType, taskReq.getType());
|
||||
List<DictTaskDO> dictTaskDOList = dictTaskMapper.selectList(wrapper);
|
||||
if (CollectionUtils.isEmpty(dictTaskDOList)) {
|
||||
return taskResp;
|
||||
}
|
||||
taskResp = dictConverter.taskDO2Resp(dictTaskDOList.get(0));
|
||||
DimensionResp dimension = dimensionService.getDimension(taskReq.getItemId());
|
||||
taskResp.setBizName(dimension.getBizName());
|
||||
taskResp.setModelId(dimension.getModelId());
|
||||
return taskResp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long addDictConf(DictConfDO dictConfDO) {
|
||||
dictConfMapper.insert(dictConfDO);
|
||||
return dictConfDO.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long editDictConf(DictConfDO dictConfDO) {
|
||||
DictItemFilter filter = DictItemFilter.builder().type(TypeEnums.valueOf(dictConfDO.getType()))
|
||||
.itemId(dictConfDO.getItemId())
|
||||
.status(StatusEnum.ONLINE)
|
||||
.build();
|
||||
|
||||
List<DictConfDO> dictConfDOList = getDictConfDOList(filter);
|
||||
if (CollectionUtils.isEmpty(dictConfDOList)) {
|
||||
return -1L;
|
||||
}
|
||||
dictConfDO.setId(dictConfDOList.get(0).getId());
|
||||
dictConfMapper.updateById(dictConfDO);
|
||||
return dictConfDO.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter) {
|
||||
List<DictConfDO> dictConfDOList = getDictConfDOList(dictItemFilter);
|
||||
return dictConverter.dictDOList2Req(dictConfDOList);
|
||||
}
|
||||
|
||||
private List<DictConfDO> getDictConfDOList(DictItemFilter dictItemFilter) {
|
||||
QueryWrapper<DictConfDO> wrapper = new QueryWrapper<>();
|
||||
if (Objects.nonNull(dictItemFilter.getId())) {
|
||||
wrapper.lambda().eq(DictConfDO::getId, dictItemFilter.getId());
|
||||
}
|
||||
if (Objects.nonNull(dictItemFilter.getType())) {
|
||||
wrapper.lambda().eq(DictConfDO::getType, dictItemFilter.getType().name());
|
||||
}
|
||||
if (Objects.nonNull(dictItemFilter.getItemId())) {
|
||||
wrapper.lambda().eq(DictConfDO::getItemId, dictItemFilter.getItemId());
|
||||
}
|
||||
if (Objects.nonNull(dictItemFilter.getStatus())) {
|
||||
wrapper.lambda().eq(DictConfDO::getStatus, dictItemFilter.getStatus().name());
|
||||
}
|
||||
List<DictConfDO> dictConfDOList = dictConfMapper.selectList(wrapper);
|
||||
return dictConfDOList;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,132 @@
|
||||
package com.tencent.supersonic.headless.server.rest;
|
||||
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.auth.api.authentication.utils.UserHolder;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import javax.validation.Valid;
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
import com.tencent.supersonic.headless.server.service.DictConfService;
|
||||
import com.tencent.supersonic.headless.server.service.DictTaskService;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.PutMapping;
|
||||
import org.springframework.web.bind.annotation.RequestBody;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/semantic/knowledge")
|
||||
public class KnowledgeController {
|
||||
|
||||
@Autowired
|
||||
private DictTaskService taskService;
|
||||
|
||||
@Autowired
|
||||
private DictConfService confService;
|
||||
|
||||
/**
|
||||
* addDictConf-新增item的字典配置
|
||||
* Add configuration information for dictionary entries
|
||||
*
|
||||
* @param dictItemReq
|
||||
*/
|
||||
@PostMapping("/conf")
|
||||
public Long addDictConf(@RequestBody @Valid DictItemReq dictItemReq,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return confService.addDictConf(dictItemReq, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* editDictConf-编辑item的字典配置
|
||||
* Edit configuration information for dictionary entries
|
||||
*
|
||||
* @param dictItemReq
|
||||
*/
|
||||
@PutMapping("/conf")
|
||||
public Long editDictConf(@RequestBody @Valid DictItemReq dictItemReq,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return confService.editDictConf(dictItemReq, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* queryDictConf-查询item的字典配置
|
||||
* query configuration information for dictionary entries
|
||||
*
|
||||
* @param filter
|
||||
*/
|
||||
@PostMapping("/conf/query")
|
||||
public List<DictItemResp> queryDictConf(@RequestBody @Valid DictItemFilter filter,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return confService.queryDictConf(filter, user);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* addDictTask-实时导入一个item的字典数据
|
||||
* write specific item values to the knowledge base
|
||||
*
|
||||
* @param taskReq
|
||||
*/
|
||||
@PostMapping("/task")
|
||||
public Long addDictTask(@RequestBody DictSingleTaskReq taskReq,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return taskService.addDictTask(taskReq, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* deleteDictTask-实时删除某一个item的字典数据
|
||||
* remove specific item values from the knowledge base
|
||||
*
|
||||
* @param taskReq
|
||||
*/
|
||||
@PutMapping("/task/delete")
|
||||
public Long deleteDictTask(@RequestBody DictSingleTaskReq taskReq,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return taskService.deleteDictTask(taskReq, user);
|
||||
}
|
||||
|
||||
/**
|
||||
* dailyDictTask-手动离线更新所以字典
|
||||
*/
|
||||
@PutMapping("/task/all")
|
||||
public Boolean dailyDictTask(
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
return taskService.dailyDictTask();
|
||||
}
|
||||
|
||||
/**
|
||||
* queryLatestDictTask-返回最新的字典任务执行情况
|
||||
*
|
||||
* @param taskReq
|
||||
*/
|
||||
@PostMapping("/task/search")
|
||||
public DictTaskResp queryLatestDictTask(@RequestBody DictSingleTaskReq taskReq,
|
||||
HttpServletRequest request,
|
||||
HttpServletResponse response) {
|
||||
User user = UserHolder.findUser(request, response);
|
||||
return taskService.queryLatestDictTask(taskReq, user);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
package com.tencent.supersonic.headless.server.service;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Make relevant settings for the dictionary
|
||||
*/
|
||||
public interface DictConfService {
|
||||
|
||||
Long addDictConf(DictItemReq itemValueReq, User user);
|
||||
|
||||
Long editDictConf(DictItemReq itemValueReq, User user);
|
||||
|
||||
List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter, User user);
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package com.tencent.supersonic.headless.server.service;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
|
||||
/**
|
||||
* Manage dictionary tasks
|
||||
*/
|
||||
public interface DictTaskService {
|
||||
Long addDictTask(DictSingleTaskReq taskReq, User user);
|
||||
|
||||
Long deleteDictTask(DictSingleTaskReq taskReq, User user);
|
||||
|
||||
Boolean dailyDictTask();
|
||||
|
||||
DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq, User user);
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
package com.tencent.supersonic.headless.server.service;
|
||||
|
||||
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.headless.core.knowledge.HanlpMapResult;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public interface KnowledgeService {
|
||||
|
||||
List<S2Term> getTerms(String text);
|
||||
|
||||
List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> viewIds);
|
||||
|
||||
List<HanlpMapResult> suffixSearch(String key, int limit, Set<Long> viewIds);
|
||||
|
||||
void updateSemanticKnowledge(List<DictWord> natures);
|
||||
|
||||
void reloadAllData(List<DictWord> natures);
|
||||
|
||||
void updateOnlineKnowledge(List<DictWord> natures);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
package com.tencent.supersonic.headless.server.service.impl;
|
||||
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
|
||||
import com.tencent.supersonic.headless.server.service.DictConfService;
|
||||
import com.tencent.supersonic.headless.server.utils.DictUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Service
|
||||
public class DictConfServiceImpl implements DictConfService {
|
||||
|
||||
private final DictRepository dictRepository;
|
||||
private final DictUtils dictConverter;
|
||||
|
||||
public DictConfServiceImpl(DictRepository dictRepository,
|
||||
DictUtils dictConverter) {
|
||||
this.dictRepository = dictRepository;
|
||||
this.dictConverter = dictConverter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long addDictConf(DictItemReq itemValueReq, User user) {
|
||||
DictConfDO dictConfDO = dictConverter.generateDictConfDO(itemValueReq, user);
|
||||
return dictRepository.addDictConf(dictConfDO);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long editDictConf(DictItemReq itemValueReq, User user) {
|
||||
DictConfDO dictConfDO = dictConverter.generateDictConfDO(itemValueReq, user);
|
||||
dictRepository.editDictConf(dictConfDO);
|
||||
if (StatusEnum.DELETED.equals(itemValueReq.getStatus())) {
|
||||
// todo delete dict file and refresh
|
||||
|
||||
}
|
||||
return itemValueReq.getItemId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DictItemResp> queryDictConf(DictItemFilter dictItemFilter, User user) {
|
||||
return dictRepository.queryDictConf(dictItemFilter);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
package com.tencent.supersonic.headless.server.service.impl;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
import com.tencent.supersonic.headless.core.file.FileHandler;
|
||||
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.repository.DictRepository;
|
||||
import com.tencent.supersonic.headless.server.service.DictTaskService;
|
||||
import com.tencent.supersonic.headless.server.service.KnowledgeService;
|
||||
import com.tencent.supersonic.headless.server.utils.DictUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DictTaskServiceImpl implements DictTaskService {
|
||||
|
||||
@Value("${dict.flush.enable:true}")
|
||||
private Boolean dictFlushEnable;
|
||||
@Value("${dict.flush.daily.enable:true}")
|
||||
private Boolean dictFlushDailyEnable;
|
||||
@Value("${dict.file.type:txt}")
|
||||
private String dictFileType;
|
||||
private String dimValue = "DimValue_%d_%d";
|
||||
|
||||
private final DictRepository dictRepository;
|
||||
private final DictUtils dictConverter;
|
||||
private final DictUtils dictUtils;
|
||||
private final FileHandler fileHandler;
|
||||
private final KnowledgeService knowledgeService;
|
||||
|
||||
public DictTaskServiceImpl(DictRepository dictRepository,
|
||||
DictUtils dictConverter,
|
||||
DictUtils dictUtils,
|
||||
FileHandler fileHandler,
|
||||
KnowledgeService knowledgeService) {
|
||||
this.dictRepository = dictRepository;
|
||||
this.dictConverter = dictConverter;
|
||||
this.dictUtils = dictUtils;
|
||||
this.fileHandler = fileHandler;
|
||||
this.knowledgeService = knowledgeService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long addDictTask(DictSingleTaskReq taskReq, User user) {
|
||||
if (!dictFlushEnable) {
|
||||
return 0L;
|
||||
}
|
||||
DictItemResp dictItemResp = fetchDictItemResp(taskReq);
|
||||
return handleDictTaskByItemResp(dictItemResp, user);
|
||||
}
|
||||
|
||||
private Long handleDictTaskByItemResp(DictItemResp dictItemResp, User user) {
|
||||
DictTaskDO dictTaskDO = dictConverter.generateDictTaskDO(dictItemResp, user);
|
||||
log.info("[addDictTask] dictTaskDO:{}", dictTaskDO);
|
||||
dictRepository.addDictTask(dictTaskDO);
|
||||
Long idInDb = dictTaskDO.getId();
|
||||
dictItemResp.setId(idInDb);
|
||||
runDictTask(dictItemResp, user);
|
||||
return idInDb;
|
||||
}
|
||||
|
||||
private DictItemResp fetchDictItemResp(DictSingleTaskReq taskReq) {
|
||||
DictItemFilter dictItemFilter = DictItemFilter.builder()
|
||||
.itemId(taskReq.getItemId())
|
||||
.type(taskReq.getType())
|
||||
.build();
|
||||
List<DictItemResp> dictItemRespList = dictRepository.queryDictConf(dictItemFilter);
|
||||
if (!CollectionUtils.isEmpty(dictItemRespList)) {
|
||||
return dictItemRespList.get(0);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private void runDictTask(DictItemResp dictItemResp, User user) {
|
||||
if (Objects.isNull(dictItemResp)) {
|
||||
return;
|
||||
}
|
||||
|
||||
DictTaskDO dictTaskDO = dictRepository.queryDictTaskById(dictItemResp.getId());
|
||||
dictTaskDO.setStatus(TaskStatusEnum.RUNNING.getStatus());
|
||||
dictRepository.editDictTask(dictTaskDO);
|
||||
|
||||
// 1.生成item字典数据
|
||||
List<String> data = dictUtils.fetchItemValue(dictItemResp);
|
||||
|
||||
// 2.变更字典文件
|
||||
String fileName = dictItemResp.fetchDictFileName() + Constants.DOT + dictFileType;
|
||||
fileHandler.writeFile(data, fileName, false);
|
||||
|
||||
// 3.实时变更内存中字典数据
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
dictTaskDO.setStatus(TaskStatusEnum.SUCCESS.getStatus());
|
||||
dictRepository.editDictTask(dictTaskDO);
|
||||
} catch (IOException e) {
|
||||
log.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long deleteDictTask(DictSingleTaskReq taskReq, User user) {
|
||||
DictItemResp dictItemResp = fetchDictItemResp(taskReq);
|
||||
String fileName = dictItemResp.fetchDictFileName() + Constants.DOT + dictFileType;
|
||||
fileHandler.deleteDictFile(fileName);
|
||||
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
} catch (Exception e) {
|
||||
log.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
@Scheduled(cron = "${knowledge.dimension.value.cron:0 0 0 * * ?}")
|
||||
public Boolean dailyDictTask() {
|
||||
log.info("[dailyDictTask] start");
|
||||
if (!dictFlushDailyEnable) {
|
||||
log.info("dictFlushDailyEnable is false, now finish dailyDictTask");
|
||||
}
|
||||
DictItemFilter filter = DictItemFilter.builder().status(StatusEnum.ONLINE).build();
|
||||
List<DictItemResp> dictItemRespList = dictRepository.queryDictConf(filter);
|
||||
dictItemRespList.stream().forEach(item -> handleDictTaskByItemResp(item, null));
|
||||
log.info("[dailyDictTask] finish");
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq, User user) {
|
||||
return dictRepository.queryLatestDictTask(taskReq);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,107 @@
|
||||
package com.tencent.supersonic.headless.server.service.impl;
|
||||
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.S2Term;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.ViewResp;
|
||||
import com.tencent.supersonic.headless.core.knowledge.DictWord;
|
||||
import com.tencent.supersonic.headless.core.knowledge.HanlpMapResult;
|
||||
import com.tencent.supersonic.headless.core.knowledge.SearchService;
|
||||
import com.tencent.supersonic.headless.core.knowledge.helper.HanlpHelper;
|
||||
import com.tencent.supersonic.headless.server.pojo.MetaFilter;
|
||||
import com.tencent.supersonic.headless.server.service.KnowledgeService;
|
||||
import com.tencent.supersonic.headless.server.service.ViewService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public class KnowledgeServiceImpl implements KnowledgeService {
|
||||
|
||||
private final ViewService viewService;
|
||||
|
||||
public KnowledgeServiceImpl(ViewService viewService) {
|
||||
this.viewService = viewService;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateSemanticKnowledge(List<DictWord> natures) {
|
||||
|
||||
List<DictWord> prefixes = natures.stream()
|
||||
.filter(entry -> !entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
for (DictWord nature : prefixes) {
|
||||
HanlpHelper.addToCustomDictionary(nature);
|
||||
}
|
||||
|
||||
List<DictWord> suffixes = natures.stream()
|
||||
.filter(entry -> entry.getNatureWithFrequency().contains(DictWordType.SUFFIX.getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
SearchService.loadSuffix(suffixes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reloadAllData(List<DictWord> natures) {
|
||||
// 1. reload custom knowledge
|
||||
try {
|
||||
HanlpHelper.reloadCustomDictionary();
|
||||
} catch (Exception e) {
|
||||
log.error("reloadCustomDictionary error", e);
|
||||
}
|
||||
|
||||
// 2. update online knowledge
|
||||
updateOnlineKnowledge(natures);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateOnlineKnowledge(List<DictWord> natures) {
|
||||
try {
|
||||
updateSemanticKnowledge(natures);
|
||||
} catch (Exception e) {
|
||||
log.error("updateSemanticKnowledge error", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<S2Term> getTerms(String text) {
|
||||
return HanlpHelper.getTerms(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HanlpMapResult> prefixSearch(String key, int limit, Set<Long> viewIds) {
|
||||
return prefixSearch(key, limit, viewIds2ModelIdList(viewIds));
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> prefixSearchByModel(String key, int limit, Set<Long> models) {
|
||||
return SearchService.prefixSearch(key, limit, models);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<HanlpMapResult> suffixSearch(String key, int limit, Set<Long> viewIds) {
|
||||
return suffixSearch(key, limit, viewIds2ModelIdList(viewIds));
|
||||
}
|
||||
|
||||
public List<HanlpMapResult> suffixSearchByModel(String key, int limit, Set<Long> models) {
|
||||
return SearchService.suffixSearch(key, limit, models);
|
||||
}
|
||||
|
||||
private Set<Long> viewIds2ModelIdList(Set<Long> viewIds) {
|
||||
Set<Long> modelIds = new HashSet<>();
|
||||
MetaFilter filter = new MetaFilter();
|
||||
filter.setIds(new ArrayList<>(viewIds));
|
||||
List<ViewResp> viewList = viewService.getViewList(filter);
|
||||
if (CollectionUtils.isEmpty(viewList)) {
|
||||
return modelIds;
|
||||
}
|
||||
viewList.stream().forEach(view -> modelIds.addAll(view.getAllModels()));
|
||||
return modelIds;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,356 @@
|
||||
package com.tencent.supersonic.headless.server.utils;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.common.pojo.Aggregator;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DateConf;
|
||||
import com.tencent.supersonic.common.pojo.Filter;
|
||||
import com.tencent.supersonic.common.pojo.Order;
|
||||
import com.tencent.supersonic.common.pojo.enums.AggOperatorEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.headless.api.pojo.Dim;
|
||||
import com.tencent.supersonic.headless.api.pojo.ItemValueConfig;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.QuerySqlReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.request.QueryStructReq;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.DimensionResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.MetricResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.ModelResp;
|
||||
import com.tencent.supersonic.headless.api.pojo.response.SemanticQueryResp;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictConfDO;
|
||||
import com.tencent.supersonic.headless.server.persistence.dataobject.DictTaskDO;
|
||||
import com.tencent.supersonic.headless.server.service.DimensionService;
|
||||
import com.tencent.supersonic.headless.server.service.MetricService;
|
||||
import com.tencent.supersonic.headless.server.service.ModelService;
|
||||
import com.tencent.supersonic.headless.server.service.QueryService;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
import static com.tencent.supersonic.common.pojo.Constants.AND_UPPER;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.APOSTROPHE;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.COMMA;
|
||||
import static com.tencent.supersonic.common.pojo.Constants.SPACE;
|
||||
|
||||
@Component
|
||||
public class DictUtils {
|
||||
|
||||
private static String dateTimeFormatter = "yyyyMMddHHmmss";
|
||||
@Value("${dimension.multi.value.split:#}")
|
||||
private String dimMultiValueSplit;
|
||||
|
||||
@Value("${item.value.max.count:100000}")
|
||||
private Long itemValueMaxCount;
|
||||
|
||||
@Value("${item.value.white.frequency:999999}")
|
||||
private Long itemValueWhiteFrequency;
|
||||
|
||||
@Value("${item.value.date.start:1}")
|
||||
private Integer itemValueDateStart;
|
||||
@Value("${item.value.date.end:1}")
|
||||
private Integer itemValueDateEnd;
|
||||
|
||||
|
||||
private final DimensionService dimensionService;
|
||||
private final MetricService metricService;
|
||||
private final QueryService queryService;
|
||||
private final ModelService modelService;
|
||||
|
||||
public DictUtils(DimensionService dimensionService,
|
||||
MetricService metricService,
|
||||
QueryService queryService,
|
||||
ModelService modelService) {
|
||||
this.dimensionService = dimensionService;
|
||||
this.metricService = metricService;
|
||||
this.queryService = queryService;
|
||||
this.modelService = modelService;
|
||||
}
|
||||
|
||||
public String fetchDictFileName(DictItemResp dictItemResp) {
|
||||
return String.format("dic_value_%d_%s_%s", dictItemResp.getModelId(), dictItemResp.getType().name(),
|
||||
dictItemResp.getItemId());
|
||||
}
|
||||
|
||||
public DictTaskDO generateDictTaskDO(DictItemResp dictItemResp, User user) {
|
||||
DictTaskDO taskDO = new DictTaskDO();
|
||||
Date createAt = new Date();
|
||||
String name = dictItemResp.fetchDictFileName();
|
||||
taskDO.setName(name);
|
||||
taskDO.setType(dictItemResp.getType().name());
|
||||
taskDO.setItemId(dictItemResp.getItemId());
|
||||
taskDO.setConfig(JsonUtil.toString(dictItemResp.getConfig()));
|
||||
taskDO.setStatus(TaskStatusEnum.PENDING.getStatus());
|
||||
taskDO.setCreatedAt(createAt);
|
||||
String creator = (Objects.isNull(user) || Strings.isNullOrEmpty(user.getName())) ? "" : user.getName();
|
||||
taskDO.setCreatedBy(creator);
|
||||
return taskDO;
|
||||
}
|
||||
|
||||
public DictConfDO generateDictConfDO(DictItemReq itemValueReq, User user) {
|
||||
DictConfDO confDO = new DictConfDO();
|
||||
BeanUtils.copyProperties(itemValueReq, confDO);
|
||||
confDO.setType(itemValueReq.getType().name());
|
||||
confDO.setConfig(JsonUtil.toString(itemValueReq.getConfig()));
|
||||
Date createAt = new Date();
|
||||
confDO.setCreatedAt(createAt);
|
||||
String creator = Strings.isNullOrEmpty(user.getName()) ? "" : user.getName();
|
||||
confDO.setCreatedBy(creator);
|
||||
confDO.setStatus(itemValueReq.getStatus().name());
|
||||
return confDO;
|
||||
}
|
||||
|
||||
public List<DictItemResp> dictDOList2Req(List<DictConfDO> dictConfDOList) {
|
||||
List<DictItemResp> dictItemReqList = new ArrayList<>();
|
||||
dictConfDOList.stream().forEach(conf -> dictItemReqList.add(dictDO2Req(conf)));
|
||||
return dictItemReqList;
|
||||
}
|
||||
|
||||
public DictItemResp dictDO2Req(DictConfDO dictConfDO) {
|
||||
DictItemResp dictItemResp = new DictItemResp();
|
||||
BeanUtils.copyProperties(dictConfDO, dictItemResp);
|
||||
dictItemResp.setType(TypeEnums.valueOf(dictConfDO.getType()));
|
||||
dictItemResp.setConfig(JsonUtil.toObject(dictConfDO.getConfig(), ItemValueConfig.class));
|
||||
dictItemResp.setStatus(StatusEnum.of(dictConfDO.getStatus()));
|
||||
if (TypeEnums.DIMENSION.equals(TypeEnums.valueOf(dictConfDO.getType()))) {
|
||||
DimensionResp dimension = dimensionService.getDimension(dictConfDO.getItemId());
|
||||
dictItemResp.setModelId(dimension.getModelId());
|
||||
dictItemResp.setBizName(dimension.getBizName());
|
||||
}
|
||||
|
||||
return dictItemResp;
|
||||
}
|
||||
|
||||
public List<String> fetchItemValue(DictItemResp dictItemResp) {
|
||||
List<String> lines = new ArrayList<>();
|
||||
QuerySqlReq querySqlReq = constructQueryReq(dictItemResp);
|
||||
querySqlReq.setNeedAuth(false);
|
||||
String bizName = dictItemResp.getBizName();
|
||||
try {
|
||||
SemanticQueryResp semanticQueryResp = queryService.queryByReq(querySqlReq, null);
|
||||
if (Objects.isNull(semanticQueryResp) || CollectionUtils.isEmpty(semanticQueryResp.getResultList())) {
|
||||
return lines;
|
||||
}
|
||||
Map<String, Long> valueAndFrequencyPair = new HashMap<>(2000);
|
||||
for (Map<String, Object> line : semanticQueryResp.getResultList()) {
|
||||
|
||||
if (CollectionUtils.isEmpty(line) || !line.containsKey(bizName)
|
||||
|| line.get(bizName) == null || line.size() != 2) {
|
||||
continue;
|
||||
}
|
||||
String dimValue = line.get(bizName).toString();
|
||||
Object metricObject = null;
|
||||
for (String key : line.keySet()) {
|
||||
if (!bizName.equalsIgnoreCase(key)) {
|
||||
metricObject = line.get(key);
|
||||
}
|
||||
}
|
||||
if (!Strings.isNullOrEmpty(dimValue) && Objects.nonNull(metricObject)) {
|
||||
Long metric = Math.round(Double.parseDouble(metricObject.toString()));
|
||||
mergeMultivaluedValue(valueAndFrequencyPair, dimValue, metric);
|
||||
}
|
||||
}
|
||||
String nature = dictItemResp.getNature();
|
||||
constructDictLines(valueAndFrequencyPair, lines, nature);
|
||||
addWhiteValueLines(dictItemResp, lines, nature);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
private void addWhiteValueLines(DictItemResp dictItemResp, List<String> lines, String nature) {
|
||||
if (Objects.isNull(dictItemResp) || Objects.isNull(dictItemResp.getConfig())
|
||||
|| CollectionUtils.isEmpty(dictItemResp.getConfig().getWhiteList())) {
|
||||
return;
|
||||
}
|
||||
List<String> whiteList = dictItemResp.getConfig().getWhiteList();
|
||||
whiteList.forEach(white -> lines.add(String.format("%s %s %s", white, nature, itemValueWhiteFrequency)));
|
||||
}
|
||||
|
||||
private void constructDictLines(Map<String, Long> valueAndFrequencyPair, List<String> lines, String nature) {
|
||||
if (CollectionUtils.isEmpty(valueAndFrequencyPair)) {
|
||||
return;
|
||||
}
|
||||
|
||||
valueAndFrequencyPair.forEach((value, frequency) -> {
|
||||
lines.add(String.format("%s %s %s", value, nature, frequency));
|
||||
});
|
||||
}
|
||||
|
||||
private void mergeMultivaluedValue(Map<String, Long> valueAndFrequencyPair, String dimValue, Long metric) {
|
||||
if (org.apache.logging.log4j.util.Strings.isEmpty(dimValue)) {
|
||||
return;
|
||||
}
|
||||
Map<String, Long> tmp = new HashMap<>();
|
||||
if (dimValue.contains(dimMultiValueSplit)) {
|
||||
Arrays.stream(dimValue.split(dimMultiValueSplit))
|
||||
.forEach(dimValueSingle -> tmp.put(dimValueSingle, metric));
|
||||
} else {
|
||||
tmp.put(dimValue, metric);
|
||||
}
|
||||
|
||||
for (String value : tmp.keySet()) {
|
||||
long metricOld = valueAndFrequencyPair.containsKey(value) ? valueAndFrequencyPair.get(value) : 0L;
|
||||
valueAndFrequencyPair.put(value, metric + metricOld);
|
||||
}
|
||||
}
|
||||
|
||||
private QuerySqlReq constructQueryReq(DictItemResp dictItemResp) {
|
||||
if (Objects.nonNull(dictItemResp) && Objects.nonNull(dictItemResp.getConfig())
|
||||
&& Objects.nonNull(dictItemResp.getConfig().getMetricId())) {
|
||||
// 查询默认指标
|
||||
QueryStructReq queryStructReq = generateQueryStruct(dictItemResp);
|
||||
return queryStructReq.convert(queryStructReq, true);
|
||||
}
|
||||
// count(1) 作为指标
|
||||
return constructQuerySqlReq(dictItemResp);
|
||||
}
|
||||
|
||||
private QuerySqlReq constructQuerySqlReq(DictItemResp dictItemResp) {
|
||||
String sqlPattern = "select %s,count(1) from tbl %s group by %s order by count(1) desc limit %d";
|
||||
String bizName = dictItemResp.getBizName();
|
||||
String whereStr = generateWhereStr(dictItemResp);
|
||||
String where = Strings.isNullOrEmpty(whereStr) ? "" : "WHERE" + whereStr;
|
||||
ItemValueConfig config = dictItemResp.getConfig();
|
||||
Long limit = (Objects.isNull(config) || Objects.isNull(config.getLimit())) ? itemValueMaxCount :
|
||||
dictItemResp.getConfig().getLimit();
|
||||
String sql = String.format(sqlPattern, bizName, where, bizName, limit);
|
||||
Set<Long> modelIds = new HashSet<>();
|
||||
modelIds.add(dictItemResp.getModelId());
|
||||
QuerySqlReq querySqlReq = new QuerySqlReq();
|
||||
querySqlReq.setSql(sql);
|
||||
querySqlReq.setNeedAuth(false);
|
||||
querySqlReq.setModelIds(modelIds);
|
||||
|
||||
return querySqlReq;
|
||||
}
|
||||
|
||||
private QueryStructReq generateQueryStruct(DictItemResp dictItemResp) {
|
||||
QueryStructReq queryStructReq = new QueryStructReq();
|
||||
|
||||
Set<Long> modelIds = new HashSet<>(Arrays.asList(dictItemResp.getModelId()));
|
||||
queryStructReq.setModelIds(modelIds);
|
||||
|
||||
List<String> groups = new ArrayList<>(Arrays.asList(dictItemResp.getBizName()));
|
||||
queryStructReq.setGroups(groups);
|
||||
|
||||
List<Filter> filters = generateFilters(dictItemResp);
|
||||
queryStructReq.setDimensionFilters(filters);
|
||||
|
||||
List<Aggregator> aggregators = new ArrayList<>();
|
||||
Long metricId = dictItemResp.getConfig().getMetricId();
|
||||
MetricResp metric = metricService.getMetric(metricId);
|
||||
String metricBizName = metric.getBizName();
|
||||
aggregators.add(new Aggregator(metricBizName, AggOperatorEnum.SUM));
|
||||
queryStructReq.setAggregators(aggregators);
|
||||
|
||||
List<Order> orders = new ArrayList<>();
|
||||
orders.add(new Order(metricBizName, Constants.DESC_UPPER));
|
||||
queryStructReq.setOrders(orders);
|
||||
|
||||
fillStructDateInfo(queryStructReq, dictItemResp);
|
||||
|
||||
Long limit = Objects.isNull(dictItemResp.getConfig().getLimit()) ? itemValueMaxCount :
|
||||
dictItemResp.getConfig().getLimit();
|
||||
queryStructReq.setLimit(limit);
|
||||
queryStructReq.setNeedAuth(false);
|
||||
return queryStructReq;
|
||||
}
|
||||
|
||||
private void fillStructDateInfo(QueryStructReq queryStructReq, DictItemResp dictItemResp) {
|
||||
ModelResp model = modelService.getModel(dictItemResp.getModelId());
|
||||
if (Objects.nonNull(model)) {
|
||||
List<Dim> timeDims = model.getTimeDimension();
|
||||
if (!CollectionUtils.isEmpty(timeDims)) {
|
||||
DateConf dateConf = new DateConf();
|
||||
dateConf.setDateMode(DateConf.DateMode.BETWEEN);
|
||||
String format = timeDims.get(0).getDateFormat();
|
||||
String start = LocalDate.now().minusDays(itemValueDateStart)
|
||||
.format(DateTimeFormatter.ofPattern(format));
|
||||
String end = LocalDate.now().minusDays(itemValueDateEnd)
|
||||
.format(DateTimeFormatter.ofPattern(format));
|
||||
dateConf.setStartDate(start);
|
||||
dateConf.setEndDate(end);
|
||||
queryStructReq.setDateInfo(dateConf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<Filter> generateFilters(DictItemResp dictItemResp) {
|
||||
List<Filter> filters = new ArrayList<>();
|
||||
if (Objects.isNull(dictItemResp)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
String whereStr = generateWhereStr(dictItemResp);
|
||||
if (Strings.isNullOrEmpty(whereStr)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
Filter filter = new Filter("", FilterOperatorEnum.SQL_PART, whereStr);
|
||||
filters.add(filter);
|
||||
return filters;
|
||||
}
|
||||
|
||||
public String generateWhereStr(DictItemResp dictItemResp) {
|
||||
StringJoiner joiner = new StringJoiner(SPACE + AND_UPPER + SPACE);
|
||||
|
||||
String bizName = dictItemResp.getBizName();
|
||||
ItemValueConfig config = dictItemResp.getConfig();
|
||||
if (Objects.nonNull(config)) {
|
||||
if (!CollectionUtils.isEmpty(config.getBlackList())) {
|
||||
StringJoiner joinerBlack = new StringJoiner(COMMA);
|
||||
config.getBlackList().stream().forEach(black -> joinerBlack.add(APOSTROPHE + black + APOSTROPHE));
|
||||
joiner.add(String.format("(%s not in (%s))", bizName, joinerBlack.toString()));
|
||||
}
|
||||
|
||||
if (!CollectionUtils.isEmpty(config.getRuleList())) {
|
||||
config.getRuleList().stream().forEach(rule -> joiner.add("(" + rule + ")"));
|
||||
}
|
||||
}
|
||||
|
||||
ModelResp model = modelService.getModel(dictItemResp.getModelId());
|
||||
if (Objects.nonNull(model)) {
|
||||
List<Dim> timeDims = model.getTimeDimension();
|
||||
if (!CollectionUtils.isEmpty(timeDims)) {
|
||||
String format = timeDims.get(0).getDateFormat();
|
||||
String start = LocalDate.now().minusDays(itemValueDateStart)
|
||||
.format(DateTimeFormatter.ofPattern(format));
|
||||
String end = LocalDate.now().minusDays(itemValueDateEnd)
|
||||
.format(DateTimeFormatter.ofPattern(format));
|
||||
joiner.add(String.format("( %s >= '%s' and %s <= '%s' )", TimeDimensionEnum.DAY.getName(), start,
|
||||
TimeDimensionEnum.DAY.getName(), end));
|
||||
}
|
||||
}
|
||||
return joiner.toString();
|
||||
}
|
||||
|
||||
public DictTaskResp taskDO2Resp(DictTaskDO dictTaskDO) {
|
||||
DictTaskResp resp = new DictTaskResp();
|
||||
BeanUtils.copyProperties(dictTaskDO, resp);
|
||||
resp.setTaskStatus(dictTaskDO.getStatus());
|
||||
resp.setType(TypeEnums.valueOf(dictTaskDO.getType()));
|
||||
resp.setConfig(JsonUtil.toObject(dictTaskDO.getConfig(), ItemValueConfig.class));
|
||||
return resp;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user