(improvement)(headless) 智能填充json提取优化 (#1608)

This commit is contained in:
yudong
2024-08-28 15:20:12 +08:00
committed by GitHub
parent b9ae0a4c92
commit 0fc4713c4f
4 changed files with 185 additions and 3 deletions

View File

@@ -125,6 +125,17 @@
<artifactId>arrow-jdbc</artifactId>
<version>${arrow-jdbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

View File

@@ -319,7 +319,7 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
public List<String> mockAlias(DimensionReq dimensionReq, String mockType, User user) {
String mockAlias = aliasGenerateHelper.generateAlias(mockType, dimensionReq.getName(),
dimensionReq.getBizName(), "", dimensionReq.getDescription(), false);
String ret = mockAlias.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(mockAlias);
return JSONObject.parseObject(ret, new TypeReference<List<String>>() {
});
}
@@ -346,9 +346,8 @@ public class DimensionServiceImpl extends ServiceImpl<DimensionDOMapper, Dimensi
}
String json = aliasGenerateHelper.generateDimensionValueAlias(JSON.toJSONString(valueList));
log.info("return llm res is :{}", json);
String ret = json.replaceAll("`", "").replace("json", "").replace("\n", "").replace(" ", "");
String ret = aliasGenerateHelper.extractJsonStringFromAiMessage(json);
JSONObject jsonObject = JSON.parseObject(ret);
List<DimValueMap> dimValueMapsResp = new ArrayList<>();
int i = 0;
for (Map<String, Object> stringObjectMap : resultList) {

View File

@@ -1,6 +1,8 @@
package com.tencent.supersonic.headless.server.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONException;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.model.chat.ChatLanguageModel;
@@ -96,4 +98,87 @@ public class AliasGenerateHelper {
log.info("msg:{}", msg);
return getChatCompletion(msg);
}
private static String extractString(String targetString, String left, String right, Boolean exclusionFlag) {
if (targetString == null || left == null || right == null || exclusionFlag == null) {
return targetString;
}
if (left.equals(right)) {
int firstIndex = targetString.indexOf(left);
if (firstIndex == -1) {
return null;
}
int secondIndex = targetString.indexOf(left, firstIndex + left.length());
if (secondIndex == -1) {
return null;
}
String extractedString = targetString.substring(firstIndex + left.length(), secondIndex);
if (!exclusionFlag) {
extractedString = left + extractedString + right;
}
return extractedString;
} else {
int leftIndex = targetString.indexOf(left);
if (leftIndex == -1) {
return null;
}
int start = leftIndex + left.length();
int rightIndex = targetString.indexOf(right, start);
if (rightIndex == -1) {
return null;
}
String extractedString = targetString.substring(start, rightIndex);
if (!exclusionFlag) {
extractedString = left + extractedString + right;
}
return extractedString;
}
}
public static String extractJsonStringFromAiMessage(String aiMessage) {
class BoundaryPattern {
final String left;
final String right;
final Boolean exclusionFlag;
public BoundaryPattern(String start, String end, Boolean includeMarkers) {
this.left = start;
this.right = end;
this.exclusionFlag = includeMarkers;
}
}
BoundaryPattern[] patterns = {
//不做任何匹配
new BoundaryPattern(null, null, null),
//```{"name":"Alice","age":25,"city":"NewYork"}```
new BoundaryPattern("```", "```", true),
//```json {"name":"Alice","age":25,"city":"NewYork"}```
new BoundaryPattern("```json", "```", true),
//```JSON {"name":"Alice","age":25,"city":"NewYork"}```
new BoundaryPattern("```JSON", "```", true),
//{"name":"Alice","age":25,"city":"NewYork"}
new BoundaryPattern("{", "}", false),
//["Alice", "Bob"]
new BoundaryPattern("[", "]", false)
};
for (BoundaryPattern pattern : patterns) {
String extracted = extractString(aiMessage, pattern.left, pattern.right, pattern.exclusionFlag);
if (extracted == null) {
continue;
}
//判断是否能解析为Object或者Array
try {
JSON.parseObject(extracted);
return extracted;
} catch (JSONException ignored) {
//ignored
}
try {
JSON.parseArray(extracted);
return extracted;
} catch (JSONException ignored) {
//ignored
}
}
throw new JSONException("json extract failed");
}
}

View File

@@ -0,0 +1,87 @@
package com.tencent.supersonic.headless.server.utils;
import org.junit.jupiter.api.Test;
class AliasGenerateHelperTest {
@Test
void extractJsonStringFromAiMessage1() {
/**
* {
* "name": "Alice",
* "age": 25,
* "city": "New York"
* }
*/
String testJson1 = "{\"name\": \"Alice\", \"age\": 25, \"city\": \"New York\"}";
AliasGenerateHelper.extractJsonStringFromAiMessage(testJson1);
}
@Test
void extractJsonStringFromAiMessage2() {
/**
* ```
* {
* "name": "Alice",
* "age": 25,
* "city": "New York"
* }
* ```
*/
String testJson2 = "```\n"
+ "{\n"
+ " \"name\": \"Alice\",\n"
+ " \"age\": 25,\n"
+ " \"city\": \"New York\"\n"
+ "}\n"
+ "```";
AliasGenerateHelper.extractJsonStringFromAiMessage(testJson2);
}
@Test
void extractJsonStringFromAiMessage3() {
/**
* I understand that you want me to generate a JSON object with two properties: `tran` and `alias`....
* ```json
* {
* "name": "Alice",
* "age": 25,
* "city": "New York"
* }
* ```
* Please let me know if there is any problem.
*/
String testJson3 = "I understand that you want me to generate a JSON object with two properties: "
+ "`tran` and `alias`...."
+ "```json\n"
+ "{\n"
+ " \"name\": \"Alice\",\n"
+ " \"age\": 25,\n"
+ " \"city\": \"New York\"\n"
+ "}\n"
+ "```"
+ "Please let me know if there is any problem.";
AliasGenerateHelper.extractJsonStringFromAiMessage(testJson3);
}
@Test
void extractJsonStringFromAiMessage4() {
String testJson4 = "Based on the provided JSON-schema, I will construct the answer as follows:\n"
+ "\n"
+ "[\n"
+ " \"作者名称\",\n"
+ " \"作者姓名\",\n"
+ " \"创作者\",\n"
+ " \"作者信息\"\n"
+ "]\n"
+ "\n"
+ "This answer conforms to the format described in the JSON-schema";
AliasGenerateHelper.extractJsonStringFromAiMessage(testJson4);
}
}