(improvement)(dict) support queryDictValue (#1196)

This commit is contained in:
daikon
2024-06-23 11:56:26 +08:00
committed by GitHub
parent a658b9c45f
commit 2ae94fb38c
7 changed files with 177 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
package com.tencent.supersonic.headless.api.pojo.request;
import com.tencent.supersonic.common.pojo.PageBaseReq;
import com.tencent.supersonic.common.pojo.enums.TypeEnums;
import lombok.Data;
import lombok.ToString;
@Data
@ToString
public class DictValueReq extends PageBaseReq {
private Long modelId;
private Long itemId;
private TypeEnums type = TypeEnums.DIMENSION;
}

View File

@@ -0,0 +1,18 @@
package com.tencent.supersonic.headless.api.pojo.response;
import lombok.Data;
import lombok.ToString;
/**
* @author: kanedai
* @date: 2024/6/22
*/
@ToString
@Data
public class DictValueResp {
private String value;
private String nature;
private Long frequency;
}

View File

@@ -1,5 +1,9 @@
package com.tencent.supersonic.headless.chat.knowledge.file;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.headless.api.pojo.request.DictValueReq;
import com.tencent.supersonic.headless.api.pojo.response.DictValueResp;
import java.util.List;
public interface FileHandler {
@@ -54,4 +58,7 @@ public interface FileHandler {
*/
void deleteFile(String fileName);
PageInfo<DictValueResp> queryDictValue(String fileName, DictValueReq dictValueReq);
String queryDictFilePath(String fileName);
}

View File

@@ -1,6 +1,10 @@
package com.tencent.supersonic.headless.chat.knowledge.file;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.headless.api.pojo.request.DictValueReq;
import com.tencent.supersonic.headless.api.pojo.response.DictValueResp;
import lombok.extern.slf4j.Slf4j;
import org.apache.logging.log4j.util.Strings;
import org.springframework.stereotype.Component;
import org.springframework.util.CollectionUtils;
@@ -14,7 +18,11 @@ import java.nio.file.StandardOpenOption;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Slf4j
@Component
@@ -22,6 +30,7 @@ public class FileHandlerImpl implements FileHandler {
public static final String FILE_SPILT = File.separator;
private final LocalFileConfig localFileConfig;
public FileHandlerImpl(LocalFileConfig localFileConfig) {
this.localFileConfig = localFileConfig;
}
@@ -68,6 +77,76 @@ public class FileHandlerImpl implements FileHandler {
}
}
@Override
public PageInfo<DictValueResp> queryDictValue(String fileName, DictValueReq dictValueReq) {
PageInfo<DictValueResp> dictValueRespPageInfo = new PageInfo<>();
String filePath = localFileConfig.getDictDirectoryLatest() + FILE_SPILT + fileName;
Long fileLineNum = getFileLineNum(filePath);
Integer startLine = (dictValueReq.getCurrent() - 1) * dictValueReq.getPageSize() + 1;
Integer endLine = Integer.valueOf(
Math.min(dictValueReq.getCurrent() * dictValueReq.getPageSize(), fileLineNum) + "");
List<DictValueResp> dictValueRespList = getFileData(filePath, startLine, endLine);
dictValueRespPageInfo.setPageSize(dictValueReq.getPageSize());
dictValueRespPageInfo.setPageNum(dictValueReq.getCurrent());
dictValueRespPageInfo.setTotal(fileLineNum);
dictValueRespPageInfo.setList(dictValueRespList);
dictValueRespPageInfo.setHasNextPage(endLine >= fileLineNum ? false : true);
dictValueRespPageInfo.setHasPreviousPage(startLine <= 0 ? false : true);
return dictValueRespPageInfo;
}
@Override
public String queryDictFilePath(String fileName) {
String path = localFileConfig.getDictDirectoryLatest() + FILE_SPILT + fileName;
if (existPath(path)) {
return path;
}
log.info("dict file:{} is not exist", path);
return null;
}
private List<DictValueResp> getFileData(String filePath, Integer startLine, Integer endLine) {
List<DictValueResp> fileData = new ArrayList<>();
try (Stream<String> lines = Files.lines(Paths.get(filePath))) {
fileData = lines
.skip(startLine - 1)
.limit(endLine - startLine + 1)
.map(lineStr -> convert2Resp(lineStr))
.filter(line -> Objects.nonNull(line))
.collect(Collectors.toList());
} catch (IOException e) {
log.warn("[getFileData] e:{}", e);
}
return fileData;
}
private DictValueResp convert2Resp(String lineStr) {
DictValueResp dictValueResp = new DictValueResp();
if (Strings.isNotEmpty(lineStr)) {
String[] itemArray = lineStr.split("\\s+");
if (Objects.nonNull(itemArray) && itemArray.length >= 3) {
dictValueResp.setValue(itemArray[0].replace("#", " "));
dictValueResp.setNature(itemArray[1]);
dictValueResp.setFrequency(Long.parseLong(itemArray[2]));
}
}
return dictValueResp;
}
private Long getFileLineNum(String filePath) {
try (Stream<String> lines = Files.lines(Paths.get(filePath))) {
Long lineCount = lines
.count();
return lineCount;
} catch (IOException e) {
e.printStackTrace();
}
return 0L;
}
@Override
public Boolean existPath(String pathStr) {
Path path = Paths.get(pathStr);

View File

@@ -1,13 +1,16 @@
package com.tencent.supersonic.headless.server.rest;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.auth.api.authentication.utils.UserHolder;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictItemReq;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.request.DictValueReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.api.pojo.response.DictValueResp;
import com.tencent.supersonic.headless.server.schedule.EmbeddingTask;
import com.tencent.supersonic.headless.server.service.DictConfService;
import com.tencent.supersonic.headless.server.service.DictTaskService;
@@ -137,4 +140,31 @@ public class KnowledgeController {
embeddingTask.reloadMetaEmbedding();
return true;
}
/**
* queryDictValue-返回字典的数据
*
* @param dictValueReq
*/
@PostMapping("/dict/data")
public PageInfo<DictValueResp> queryDictValue(@RequestBody @Valid DictValueReq dictValueReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return taskService.queryDictValue(dictValueReq, user);
}
/**
* queryDictFilePath-返回字典文件的路径
*
* @param dictValueReq
*/
@PostMapping("/dict/file")
public String queryDictFilePath(@RequestBody @Valid DictValueReq dictValueReq,
HttpServletRequest request,
HttpServletResponse response) {
User user = UserHolder.findUser(request, response);
return taskService.queryDictFilePath(dictValueReq, user);
}
}

View File

@@ -1,8 +1,11 @@
package com.tencent.supersonic.headless.server.service;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.request.DictValueReq;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.api.pojo.response.DictValueResp;
/**
* Manage dictionary tasks
@@ -15,4 +18,8 @@ public interface DictTaskService {
Boolean dailyDictTask();
DictTaskResp queryLatestDictTask(DictSingleTaskReq taskReq, User user);
PageInfo<DictValueResp> queryDictValue(DictValueReq dictValueReq, User user);
String queryDictFilePath(DictValueReq dictValueReq, User user);
}

View File

@@ -1,13 +1,16 @@
package com.tencent.supersonic.headless.server.service.impl;
import com.github.pagehelper.PageInfo;
import com.tencent.supersonic.auth.api.authentication.pojo.User;
import com.tencent.supersonic.common.pojo.Constants;
import com.tencent.supersonic.common.pojo.enums.StatusEnum;
import com.tencent.supersonic.common.pojo.enums.TaskStatusEnum;
import com.tencent.supersonic.headless.api.pojo.request.DictItemFilter;
import com.tencent.supersonic.headless.api.pojo.request.DictSingleTaskReq;
import com.tencent.supersonic.headless.api.pojo.request.DictValueReq;
import com.tencent.supersonic.headless.api.pojo.response.DictItemResp;
import com.tencent.supersonic.headless.api.pojo.response.DictTaskResp;
import com.tencent.supersonic.headless.api.pojo.response.DictValueResp;
import com.tencent.supersonic.headless.chat.knowledge.KnowledgeBaseService;
import com.tencent.supersonic.headless.chat.knowledge.file.FileHandler;
import com.tencent.supersonic.headless.chat.knowledge.helper.HanlpHelper;
@@ -148,4 +151,21 @@ public class DictTaskServiceImpl implements DictTaskService {
return dictRepository.queryLatestDictTask(taskReq);
}
@Override
public PageInfo<DictValueResp> queryDictValue(DictValueReq dictValueReq, User user) {
String fileName = String.format("dic_value_%d_%s_%s",
dictValueReq.getModelId(), dictValueReq.getType().name(), dictValueReq.getItemId())
+ Constants.DOT + dictFileType;
PageInfo<DictValueResp> dictValueRespList = fileHandler.queryDictValue(fileName, dictValueReq);
return dictValueRespList;
}
@Override
public String queryDictFilePath(DictValueReq dictValueReq, User user) {
String fileName = String.format("dic_value_%d_%s_%s",
dictValueReq.getModelId(), dictValueReq.getType().name(), dictValueReq.getItemId())
+ Constants.DOT + dictFileType;
return fileHandler.queryDictFilePath(fileName);
}
}