mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
Compare commits
280 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f60c1675cd | ||
|
|
1d9b6d6877 | ||
|
|
d8930e8906 | ||
|
|
c68df24375 | ||
|
|
bb1001677d | ||
|
|
7a1cfbcef8 | ||
|
|
67b9c4bf79 | ||
|
|
7cb7697353 | ||
|
|
3e18655c69 | ||
|
|
e7d52f87f0 | ||
|
|
2cd8f8022b | ||
|
|
e08435902a | ||
|
|
b44fa2bf3c | ||
|
|
d7f1f06daf | ||
|
|
4c26e0c972 | ||
|
|
d7fafa361d | ||
|
|
0c69651ef3 | ||
|
|
b5fdbfbbf6 | ||
|
|
33a2688e77 | ||
|
|
6bd97cd8af | ||
|
|
64615cbef9 | ||
|
|
dfb3b59984 | ||
|
|
61641ecb00 | ||
|
|
5016881ce3 | ||
|
|
fe75b3e393 | ||
|
|
3db443f9b1 | ||
|
|
59c21ea19a | ||
|
|
95334441b1 | ||
|
|
276b224c13 | ||
|
|
f03da53d6f | ||
|
|
9201550027 | ||
|
|
c86cd9f901 | ||
|
|
ef8caea9d2 | ||
|
|
6daaff8c30 | ||
|
|
4b00c16eb7 | ||
|
|
4dae84034e | ||
|
|
e6eac03ec6 | ||
|
|
e9a479e2df | ||
|
|
7db1cc270e | ||
|
|
3bf5b86535 | ||
|
|
3d30632b41 | ||
|
|
287a6561ff | ||
|
|
169262cc62 | ||
|
|
fda5a577d6 | ||
|
|
f89be48e98 | ||
|
|
2c7afd0d55 | ||
|
|
2ad0553f6c | ||
|
|
340cb2c835 | ||
|
|
caefa501f2 | ||
|
|
5c96d75d39 | ||
|
|
86c2f96942 | ||
|
|
73899e3174 | ||
|
|
49bb2c6d8b | ||
|
|
9223a4f856 | ||
|
|
f3f60af231 | ||
|
|
3cdfcae01c | ||
|
|
0c6efada43 | ||
|
|
d79f73eab6 | ||
|
|
3ae720ef30 | ||
|
|
221e88de0f | ||
|
|
23d926f195 | ||
|
|
97b11ec244 | ||
|
|
899047dbd1 | ||
|
|
cb4b91878f | ||
|
|
6af661459c | ||
|
|
0e0ba51750 | ||
|
|
a5c32ac064 | ||
|
|
abbe8c84a1 | ||
|
|
6c0f88d8b5 | ||
|
|
68ada561ac | ||
|
|
18b52ec742 | ||
|
|
ca8d7d89c1 | ||
|
|
e6ab7cb5ff | ||
|
|
9679169e6f | ||
|
|
ed0f856438 | ||
|
|
9aa5c93d9d | ||
|
|
b45592c009 | ||
|
|
6e0fa95e6f | ||
|
|
94f310d17f | ||
|
|
2bc29d64a4 | ||
|
|
c220ca69c2 | ||
|
|
4280aad0a7 | ||
|
|
c98d15059b | ||
|
|
a862a83272 | ||
|
|
c6d59701db | ||
|
|
39a85dc4ed | ||
|
|
507c02a8fd | ||
|
|
380597f0c3 | ||
|
|
e469c449b4 | ||
|
|
f8bdb8a4b4 | ||
|
|
d76216a2ec | ||
|
|
82cfb3050d | ||
|
|
57f7d0c67d | ||
|
|
c11a242f34 | ||
|
|
576fad5fb1 | ||
|
|
8171d754e0 | ||
|
|
6be0f02c75 | ||
|
|
95e3138ab2 | ||
|
|
3a30a1a317 | ||
|
|
46733d1728 | ||
|
|
b6734d99e1 | ||
|
|
9cb01149f8 | ||
|
|
db88127da9 | ||
|
|
0e492ef402 | ||
|
|
4d095f9676 | ||
|
|
7c7ccadcfd | ||
|
|
02b9dc6947 | ||
|
|
4222d7e2b5 | ||
|
|
b5daf04c96 | ||
|
|
d26089a249 | ||
|
|
627683d437 | ||
|
|
87e222eecc | ||
|
|
667272b103 | ||
|
|
3f08d95aaa | ||
|
|
27ebda3439 | ||
|
|
52eca178d3 | ||
|
|
f5a064aaad | ||
|
|
41e585324d | ||
|
|
c36082476f | ||
|
|
4bc1378285 | ||
|
|
23d01c4f83 | ||
|
|
49ebb70cb3 | ||
|
|
27bb1b322e | ||
|
|
0534053ff9 | ||
|
|
fe2a424718 | ||
|
|
d79e30cd7a | ||
|
|
aa433baa06 | ||
|
|
30bb9a1dc0 | ||
|
|
c168925f03 | ||
|
|
42c0bea8fc | ||
|
|
291c00749a | ||
|
|
6763ea0f7b | ||
|
|
f917defea8 | ||
|
|
91718592d4 | ||
|
|
6d9a8095eb | ||
|
|
5b8fdbc6fd | ||
|
|
e15e44e4a2 | ||
|
|
980d317152 | ||
|
|
2c23c2f574 | ||
|
|
80ad75503b | ||
|
|
0143b0a1b2 | ||
|
|
dd115f9d37 | ||
|
|
f198ce1ef8 | ||
|
|
18211a215d | ||
|
|
d6a386ad03 | ||
|
|
8f19584ad7 | ||
|
|
d9eaf79ab8 | ||
|
|
05b1a7ec3b | ||
|
|
11cdcb29fa | ||
|
|
46a9e5b097 | ||
|
|
8c65ac80b5 | ||
|
|
5b3a9ffba8 | ||
|
|
8688c8c2b3 | ||
|
|
13d8b9cff5 | ||
|
|
aa448b1ba3 | ||
|
|
7ef3d92f2c | ||
|
|
9f09598ccd | ||
|
|
36c8938ff7 | ||
|
|
3271db4ca6 | ||
|
|
400d8b34fd | ||
|
|
d4374f7074 | ||
|
|
438ee539d6 | ||
|
|
5ccde0206c | ||
|
|
74ed269544 | ||
|
|
1ad2c5402b | ||
|
|
805abeb261 | ||
|
|
551a376b00 | ||
|
|
47be92d5f6 | ||
|
|
5feac0c14e | ||
|
|
0f02e21eaa | ||
|
|
cdb84716b7 | ||
|
|
731238de08 | ||
|
|
cb1ad94086 | ||
|
|
24b0be7566 | ||
|
|
3241ef87a3 | ||
|
|
bd541e1199 | ||
|
|
f998f27c6f | ||
|
|
cf788316c3 | ||
|
|
8ed7e91221 | ||
|
|
e537b738e4 | ||
|
|
bf3a111e55 | ||
|
|
63a526709d | ||
|
|
e0088e8f8f | ||
|
|
7d33c49db8 | ||
|
|
acee0a36da | ||
|
|
a528ba6070 | ||
|
|
ba224ac335 | ||
|
|
18aa14118c | ||
|
|
4e139c837a | ||
|
|
6ad74bb206 | ||
|
|
16c3de44e4 | ||
|
|
608a4f7a2f | ||
|
|
cd972d0850 | ||
|
|
2aeeb1a14e | ||
|
|
41aa6ff8e4 | ||
|
|
67f658ced2 | ||
|
|
94fa86629d | ||
|
|
2cb0640a7b | ||
|
|
772d5bd3ae | ||
|
|
d6681ead60 | ||
|
|
d94fd4714f | ||
|
|
0365886270 | ||
|
|
aa6c658a9a | ||
|
|
6e3f871015 | ||
|
|
6c9983164e | ||
|
|
e00b935c1f | ||
|
|
f5f9c0314a | ||
|
|
910384d17f | ||
|
|
2fe56e7462 | ||
|
|
b8989e204f | ||
|
|
84b7c2c062 | ||
|
|
14373309aa | ||
|
|
9cd3e22721 | ||
|
|
2f812372d7 | ||
|
|
9f813ca1c0 | ||
|
|
70784598e1 | ||
|
|
ad20380283 | ||
|
|
f4e3922f47 | ||
|
|
bfac71a7d0 | ||
|
|
435e789fa4 | ||
|
|
b9f5e0a354 | ||
|
|
372e4acc2c | ||
|
|
8f37c3175f | ||
|
|
438e8463f5 | ||
|
|
ae9aa1ba0f | ||
|
|
688d26c457 | ||
|
|
8b99b46787 | ||
|
|
80cce47f58 | ||
|
|
c92184d89f | ||
|
|
a8fe575999 | ||
|
|
38099c8cc7 | ||
|
|
32e51257f6 | ||
|
|
e44e7ca8d5 | ||
|
|
d533496b2a | ||
|
|
eb9db28352 | ||
|
|
836ee5f3ed | ||
|
|
0fa31f84a3 | ||
|
|
9a3c71df4a | ||
|
|
e4e39e0496 | ||
|
|
cd901fbc68 | ||
|
|
8fde378534 | ||
|
|
d8f81aca65 | ||
|
|
b9895d541b | ||
|
|
4fbc3c8533 | ||
|
|
62e2bf7de6 | ||
|
|
c6f9ea2b20 | ||
|
|
166a3cfe09 | ||
|
|
cbf84876de | ||
|
|
8bd43f113b | ||
|
|
d710986923 | ||
|
|
dd63b78937 | ||
|
|
8d1a07585b | ||
|
|
f4638b48d5 | ||
|
|
a1d56fc7e4 | ||
|
|
9879c99873 | ||
|
|
1016efc646 | ||
|
|
156aa6822b | ||
|
|
34eb94320e | ||
|
|
ba1d14f40a | ||
|
|
dc4fbb1a14 | ||
|
|
7d770d2a6d | ||
|
|
7b861f563c | ||
|
|
65614ed3ba | ||
|
|
7acdf9cb3d | ||
|
|
2e4954a4e8 | ||
|
|
36052cb4f2 | ||
|
|
8d81f63e08 | ||
|
|
bf5be11549 | ||
|
|
36907ccac1 | ||
|
|
0f3e9e8754 | ||
|
|
883cdbefbe | ||
|
|
968d50e071 | ||
|
|
a9bb1c1f68 | ||
|
|
207d6cba43 | ||
|
|
40ba179703 | ||
|
|
5b8fde70ca | ||
|
|
e3232f0198 | ||
|
|
c5536aa25d | ||
|
|
37bb9ff767 | ||
|
|
f2e8207245 |
@@ -4,6 +4,15 @@
|
||||
- "Breaking Changes" describes any changes that may break existing functionality or cause
|
||||
compatibility issues with previous versions.
|
||||
|
||||
## SuperSonic [0.8.2] - 2023-12-18
|
||||
|
||||
### Added
|
||||
- rewrite Python service with Java project, default to Java implementation.
|
||||
- support setting the SQL generation method for large models in the interface.
|
||||
- optimization of metric market experience.
|
||||
- optimization of semantic modeling canvas experience.
|
||||
- code structure adjustment and abstraction optimization for chat.
|
||||
|
||||
## SuperSonic [0.7.5] - 2023-10-13
|
||||
|
||||
### Added
|
||||
|
||||
16
README.md
16
README.md
@@ -2,25 +2,25 @@
|
||||
|
||||
# SuperSonic (超音数)
|
||||
|
||||
**SuperSonic is an out-of-the-box yet highly extensible framework for building ChatBI**. SuperSonic provides a chat interface that empowers users to query data using natural language and visualize the results with suitable charts. To enable such experience, the only thing necessary is to build logical semantic models (definition of metrics/dimensions/entities, along with their meaning, context and relationships) on top of physical data models, and no data modification or copying is required. Meanwhile, SuperSonic is designed to be pluggable, allowing new functionalities to be added through plugins and core components to be integrated with other systems.
|
||||
**SuperSonic is the next-generation LLM-powered data analytics platform that integrates ChatBI and HeadlessBI**. SuperSonic provides a chat interface that empowers users to query data using natural language and visualize the results with suitable charts. To enable such experience, the only thing necessary is to build logical semantic models (definition of entities/metrics/dimensions/tags, along with their meaning, context and relationships) on top of physical data models, and **no data modification or copying** is required. Meanwhile, SuperSonic is designed to be **highly extensible**, allowing custom functionalities to be added and configured with Java SPI.
|
||||
|
||||
<img src="./docs/images/supersonic_demo.gif" height="100%" width="100%" align="center"/>
|
||||
|
||||
## Motivation
|
||||
|
||||
The emergence of Large Language Model (LLM) like ChatGPT is reshaping the way information is retrieved. In the field of data analytics, both academia and industry are primarily focused on leveraging LLM to convert natural language into SQL (so called text2sql or nl2sql). While some works exhibit promising results, their **reliability** is inadequate for real-world applications.
|
||||
The emergence of Large Language Model (LLM) like ChatGPT is reshaping the way information is retrieved. In the field of data analytics, both academia and industry are primarily focused on leveraging LLM to convert natural language into SQL (so called Text2SQL or NL2SQL). While some approaches exhibit promising results, their **reliability** and **efficiency** are insufficient for real-world applications.
|
||||
|
||||
From our perspective, the key to filling the real-world gap lies in three aspects:
|
||||
1. Introduce a semantic layer encapsulating underlying data context(joins, formulas, etc) to reduce **complexity**.
|
||||
2. Augment the LLM with schema mappers(as a kind of preprocessor) and semantic correctors(as a kind of postprocessor) to mitigate **hallucination**.
|
||||
3. Utilize heuristic rules when necessary to improve **efficiency**(in terms of latency and cost).
|
||||
1. Integrate ChatBI with HeadlessBI encapsulating underlying data context (joins, keys, formulas, etc) to **reduce complexity**.
|
||||
2. Augment the LLM with schema mappers(as a kind of preprocessor) and semantic correctors(as a kind of postprocessor) to **mitigate hallucination**.
|
||||
3. Utilize rule-based schema parsers when necessary to **improve efficiency**(in terms of latency and cost).
|
||||
|
||||
With these ideas in mind, we develop SuperSonic as a practical reference implementation and use it to power our real-world products. Additionally, to facilitate further development of ChatBI, we decide to open source SuperSonic as an extensible framework.
|
||||
|
||||
## Out-of-the-box Features
|
||||
|
||||
- Built-in CUI(Chat User Interface) for *business users* to enter data queries
|
||||
- Built-in GUI(Graphical User Interface) for *analytics engineers* to build semantic models
|
||||
- Built-in ChatBI interface for *business users* to enter natural language queries
|
||||
- Built-in HeadlessBI interface for *analytics engineers* to build semantic models
|
||||
- Built-in GUI for *system administrators* to manage chat agents and third-party plugins
|
||||
- Support input auto-completion as well as query recommendation
|
||||
- Support multi-turn conversation and history context management
|
||||
@@ -49,7 +49,7 @@ The high-level architecture and main process flow is as follows:
|
||||
SuperSonic comes with sample semantic models as well as chat conversations that can be used as a starting point. Please follow the steps:
|
||||
|
||||
- Download the latest prebuilt binary from the [release page](https://github.com/tencentmusic/supersonic/releases)
|
||||
- Run script "bin/supersonic-daemon.sh" to start services (one java process and one python process)
|
||||
- Run script "assembly/bin/supersonic-daemon.sh start" to start a standalone Java service
|
||||
- Visit http://localhost:9080 in the browser to start exploration
|
||||
|
||||
## Build and Development
|
||||
|
||||
20
README_CN.md
20
README_CN.md
@@ -1,6 +1,6 @@
|
||||
# 超音数(SuperSonic)
|
||||
# SuperSonic (超音数)
|
||||
|
||||
**超音数是一个开箱即用且易于扩展的数据问答对话框架**。通过超音数的问答对话界面,用户能够使用自然语言查询数据,系统会选择合适的可视化图表呈现结果。超音数不需要修改或复制数据,只需要在物理数据模型之上构建逻辑语义模型(指标/维度/实体的定义,以及他们的业务含义、相互间关系等),即可开启数据问答体验。与此同时,超音数被设计为可插拔式的框架,允许以插件形式来扩展新功能,或者将核心组件与其他系统集成。
|
||||
**SuperSonic融合ChatBI和HeadlessBI打造新一代的数据分析平台**。通过SuperSonic的问答对话界面,用户能够使用自然语言查询数据,系统会选择合适的可视化图表呈现结果。SuperSonic不需要修改或复制数据,只需要在物理数据模型之上构建逻辑语义模型(指标/维度/实体的定义,以及他们的业务含义、相互间关系等),即可开启数据问答体验。与此同时,SuperSonic被设计为可插拔的框架,采用Java SPI机制来扩展定制功能。
|
||||
|
||||
<img src="./docs/images/supersonic_demo.gif" height="100%" width="100%" align="center"/>
|
||||
|
||||
@@ -9,24 +9,24 @@
|
||||
大型语言模型(LLMs)如ChatGPT的出现正在重塑信息检索的方式。在数据分析领域,学术界和工业界主要关注利用深度学习模型将自然语言查询转换为SQL查询。虽然一些工作显示出有前景的结果,但它们的可靠性还达不到生产可用的要求。
|
||||
|
||||
在我们看来,为了在实际场景发挥价值,有三个关键点:
|
||||
1. 引入语义模型层,封装底层数据的上下文(关联、公式等),降低SQL生成的**复杂度**。
|
||||
1. 融合HeadlessBI,通过统一语义层封装底层数据细节(关联、键值、公式等),降低SQL生成的**复杂度**。
|
||||
2. 通过一前一后的模式映射器和语义修正器,来缓解LLM常见的**幻觉**现象。
|
||||
3. 设计启发式的规则,在一些特定场景提升语义解析的**效率**。
|
||||
|
||||
为了验证上述想法,我们开发了超音数项目,并将其应用在实际的内部产品中。与此同时,我们将超音数作为一个可扩展的框架开源,希望能够促进数据问答对话领域的进一步发展。
|
||||
为了验证上述想法,我们开发了SuperSonic项目,并将其应用在实际的内部产品中。与此同时,我们将SuperSonic作为一个可扩展的框架开源,希望能够促进数据问答对话领域的进一步发展。
|
||||
|
||||
## 开箱即用的特性
|
||||
|
||||
- 内置对话界面以便*业务用户*输入数据查询。
|
||||
- 内置图形界面以便*分析工程师*构建语义模型。
|
||||
- 内置图形界面以便*系统管理员*管理第三方插件和对话助理。
|
||||
- 内置ChatBI界面以便*业务用户*输入数据查询。
|
||||
- 内置HeadlessBI界面以便*分析工程师*构建语义模型。
|
||||
- 内置图形用户界面以便*系统管理员*管理第三方插件和对话助理。
|
||||
- 支持文本输入的联想和查询问题的推荐。
|
||||
- 支持多轮对话,根据语境自动切换上下文。
|
||||
- 支持四级权限控制:主题域级、模型级、列级、行级。
|
||||
|
||||
## 易于扩展的组件
|
||||
|
||||
超音数的整体架构和主流程如下图所示:
|
||||
SuperSonic的整体架构和主流程如下图所示:
|
||||
|
||||
<img src="./docs/images/supersonic_components.png" height="65%" width="65%" align="center"/>
|
||||
|
||||
@@ -44,10 +44,10 @@
|
||||
|
||||
## 快速体验
|
||||
|
||||
超音数自带样例的语义模型和问答对话,只需以下三步即可快速体验:
|
||||
SuperSonic自带样例的语义模型和问答对话,只需以下三步即可快速体验:
|
||||
|
||||
- 从[release page](https://github.com/tencentmusic/supersonic/releases)下载预先构建好的发行包
|
||||
- 运行 "bin/supersonic-daemon.sh"启动服务(一个Java进程和一个Python进程)
|
||||
- 运行 "assembly/bin/supersonic-daemon.sh start"启动standalone模式的Java服务
|
||||
- 在浏览器访问http://localhost:9080 开启探索
|
||||
|
||||
## 如何构建和部署
|
||||
|
||||
@@ -6,11 +6,18 @@ set "baseDir=%~dp0.."
|
||||
set "buildDir=%baseDir%\build"
|
||||
set "runtimeDir=%baseDir%\..\runtime"
|
||||
set "pip_path=pip3"
|
||||
set "service=%~1"
|
||||
|
||||
|
||||
rem 1. build backend java modules
|
||||
del /q "%buildDir%\*.tar.gz" 2>NUL
|
||||
call mvn -f "%baseDir%\..\pom.xml" clean package -DskipTests
|
||||
|
||||
IF ERRORLEVEL 1 (
|
||||
ECHO Failed to build backend Java modules.
|
||||
EXIT /B 1
|
||||
)
|
||||
|
||||
rem 2. move package to build
|
||||
echo f|xcopy "%baseDir%\..\launchers\standalone\target\*.tar.gz" "%buildDir%\supersonic-standalone.tar.gz"
|
||||
|
||||
@@ -19,6 +26,11 @@ cd "%baseDir%\..\webapp"
|
||||
call start-fe-prod.bat
|
||||
copy /y "%baseDir%\..\webapp\supersonic-webapp.tar.gz" "%buildDir%\"
|
||||
|
||||
IF ERRORLEVEL 1 (
|
||||
ECHO Failed to build frontend webapp.
|
||||
EXIT /B 1
|
||||
)
|
||||
|
||||
rem 4. copy webapp to java classpath
|
||||
cd "%buildDir%"
|
||||
tar -zxvf supersonic-webapp.tar.gz
|
||||
@@ -26,16 +38,23 @@ move supersonic-webapp webapp
|
||||
move webapp ..\..\launchers\standalone\target\classes
|
||||
|
||||
rem 5. build backend python modules
|
||||
echo "start installing python modules with pip: ${pip_path}"
|
||||
set requirementPath="%baseDir%/../chat/core/src/main/python/requirements.txt"
|
||||
%pip_path% install -r %requirementPath%
|
||||
echo "install python modules success"
|
||||
if "%service%"=="pyllm" (
|
||||
echo "start installing python modules with pip: ${pip_path}"
|
||||
set requirementPath="%baseDir%/../chat/python/requirements.txt"
|
||||
%pip_path% install -r %requirementPath%
|
||||
echo "install python modules success"
|
||||
)
|
||||
|
||||
call :BUILD_RUNTIME
|
||||
|
||||
:BUILD_RUNTIME
|
||||
rem 6. reset runtime
|
||||
rd /s /q "%runtimeDir%"
|
||||
IF EXIST "%runtimeDir%" (
|
||||
echo begin to delete dir : %runtimeDir%
|
||||
rd /s /q "%runtimeDir%"
|
||||
) ELSE (
|
||||
echo %runtimeDir% does not exist, create directly
|
||||
)
|
||||
mkdir "%runtimeDir%"
|
||||
tar -zxvf "%buildDir%\supersonic-standalone.tar.gz" -C "%runtimeDir%"
|
||||
for /d %%f in ("%runtimeDir%\launchers-standalone-*") do (
|
||||
|
||||
@@ -4,16 +4,19 @@ set -x
|
||||
sbinDir=$(cd "$(dirname "$0")"; pwd)
|
||||
chmod +x $sbinDir/supersonic-common.sh
|
||||
source $sbinDir/supersonic-common.sh
|
||||
|
||||
cd $baseDir
|
||||
|
||||
service=$1
|
||||
#1. build backend java modules
|
||||
rm -fr ${buildDir}/*.tar.gz
|
||||
rm -fr dist
|
||||
|
||||
set +x
|
||||
|
||||
mvn -f $baseDir/../ clean package -DskipTests
|
||||
# check build result
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed to build backend Java modules."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#2. move package to build
|
||||
cp $baseDir/../launchers/semantic/target/*.tar.gz ${buildDir}/supersonic-semantic.tar.gz
|
||||
@@ -26,6 +29,11 @@ cd ../webapp
|
||||
sh ./start-fe-prod.sh
|
||||
cp -fr ./supersonic-webapp.tar.gz ${buildDir}/
|
||||
|
||||
# check build result
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Failed to build frontend webapp."
|
||||
exit 1
|
||||
fi
|
||||
#4. copy webapp to java classpath
|
||||
cd $buildDir
|
||||
tar xvf supersonic-webapp.tar.gz
|
||||
@@ -36,13 +44,15 @@ cp -fr webapp ../../launchers/standalone/target/classes
|
||||
rm -fr ${buildDir}/webapp
|
||||
|
||||
#5. build backend python modules
|
||||
echo "start installing python modules with pip: ${pip_path}"
|
||||
requirementPath=$baseDir/../chat/core/src/main/python/requirements.txt
|
||||
${pip_path} install -r ${requirementPath}
|
||||
echo "install python modules success"
|
||||
if [ "$service" == "pyllm" ]; then
|
||||
echo "start installing python modules with pip: ${pip_path}"
|
||||
requirementPath=$baseDir/../chat/python/requirements.txt
|
||||
${pip_path} install -r ${requirementPath}
|
||||
echo "install python modules success"
|
||||
fi
|
||||
|
||||
#6. reset runtime
|
||||
rm -fr $runtimeDir/*
|
||||
rm -fr $runtimeDir/supersonic*
|
||||
moveAllToRuntime
|
||||
setEnvToWeb chat
|
||||
setEnvToWeb semantic
|
||||
|
||||
@@ -11,14 +11,14 @@ buildDir=$baseDir/build
|
||||
|
||||
readonly CHAT_APP_NAME="supersonic_chat"
|
||||
readonly SEMANTIC_APP_NAME="supersonic_semantic"
|
||||
readonly LLMPARSER_APP_NAME="supersonic_llmparser"
|
||||
readonly PYLLM_APP_NAME="supersonic_pyllm"
|
||||
readonly STANDALONE_APP_NAME="supersonic_standalone"
|
||||
readonly CHAT_SERVICE="chat"
|
||||
readonly SEMANTIC_SERVICE="semantic"
|
||||
readonly LLMPARSER_SERVICE="llmparser"
|
||||
readonly PYLLM_SERVICE="pyllm"
|
||||
readonly STANDALONE_SERVICE="standalone"
|
||||
readonly LLMPARSER_HOST="127.0.0.1"
|
||||
readonly LLMPARSER_PORT="9092"
|
||||
readonly PYLLM_HOST="127.0.0.1"
|
||||
readonly PYLLM_PORT="9092"
|
||||
|
||||
function setEnvToWeb {
|
||||
model_name=$1
|
||||
@@ -29,11 +29,15 @@ function setEnvToWeb {
|
||||
|
||||
function moveToRuntime {
|
||||
model_name=$1
|
||||
tar -zxvf ${buildDir}/supersonic-${model_name}.tar.gz -C ${runtimeDir}
|
||||
mv ${runtimeDir}/launchers-${model_name}-* ${runtimeDir}/supersonic-${model_name}
|
||||
|
||||
mkdir -p ${runtimeDir}/supersonic-${model_name}/webapp
|
||||
cp -fr ${buildDir}/webapp/* ${runtimeDir}/supersonic-${model_name}/webapp
|
||||
file="${buildDir}/supersonic-${model_name}.tar.gz"
|
||||
if [ -f "$file" ]; then
|
||||
tar -zxvf "$file" -C ${runtimeDir}
|
||||
mv ${runtimeDir}/launchers-${model_name}-* ${runtimeDir}/supersonic-${model_name}
|
||||
mkdir -p ${runtimeDir}/supersonic-${model_name}/webapp
|
||||
cp -fr ${buildDir}/webapp/* ${runtimeDir}/supersonic-${model_name}/webapp
|
||||
else
|
||||
echo "File $file does not exist. Skipping the move to runtime."
|
||||
fi
|
||||
}
|
||||
|
||||
function moveAllToRuntime {
|
||||
@@ -81,25 +85,26 @@ function runJavaService {
|
||||
|
||||
# run python service
|
||||
function runPythonService {
|
||||
pythonRunDir=${runtimeDir}/supersonic-${model_name}/llmparser
|
||||
pythonRunDir=${runtimeDir}/supersonic-${model_name}/pyllm
|
||||
cd $pythonRunDir
|
||||
nohup ${python_path} supersonic_llmparser.py > $pythonRunDir/llmparser.log 2>&1 &
|
||||
nohup ${python_path} supersonic_pyllm.py > $pythonRunDir/pyllm.log 2>&1 &
|
||||
# add health check
|
||||
for i in {1..10}
|
||||
do
|
||||
echo "llmparser health check attempt $i..."
|
||||
response=$(curl -s http://${LLMPARSER_HOST}:${LLMPARSER_PORT}/health)
|
||||
echo "llmparser health check response: $response"
|
||||
echo "pyllm health check attempt $i..."
|
||||
response=$(curl -s http://${PYLLM_HOST}:${PYLLM_PORT}/health)
|
||||
echo "pyllm health check response: $response"
|
||||
status_ok="Healthy"
|
||||
if [[ $response == *$status_ok* ]] ; then
|
||||
echo "llmparser Health check passed."
|
||||
echo "pyllm Health check passed."
|
||||
break
|
||||
else
|
||||
if [ "$i" -eq 10 ]; then
|
||||
echo "llmparser Health check failed after 10 attempts. Exiting."
|
||||
echo "pyllm Health check failed after 10 attempts."
|
||||
echo "May still downloading model files. Please check pyllm.log in runtime directory."
|
||||
fi
|
||||
echo "Retrying after 5 seconds..."
|
||||
sleep 5
|
||||
fi
|
||||
done
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,10 +9,10 @@ set "main_class=com.tencent.supersonic.StandaloneLauncher"
|
||||
set "python_path=python"
|
||||
set "pip_path=pip3"
|
||||
set "standalone_service=standalone"
|
||||
set "llmparser_service=llmparser"
|
||||
set "pyllm_service=pyllm"
|
||||
|
||||
set "javaRunDir=%runtimeDir%\supersonic-standalone"
|
||||
set "pythonRunDir=%runtimeDir%\supersonic-standalone\llmparser"
|
||||
set "pythonRunDir=%runtimeDir%\supersonic-standalone\pyllm"
|
||||
|
||||
set "command=%~1"
|
||||
set "service=%~2"
|
||||
@@ -21,6 +21,10 @@ if "%service%"=="" (
|
||||
set "service=%standalone_service%"
|
||||
)
|
||||
|
||||
IF "%service%"=="pyllm" (
|
||||
SET "llmProxy=PythonLLMProxy"
|
||||
)
|
||||
|
||||
call :BUILD_RUNTIME
|
||||
|
||||
if "%command%"=="restart" (
|
||||
@@ -42,27 +46,23 @@ if "%command%"=="restart" (
|
||||
)
|
||||
|
||||
:START
|
||||
if "%service%"=="%llmparser_service%" (
|
||||
if "%service%"=="%pyllm_service%" (
|
||||
call :START_PYTHON
|
||||
call :START_JAVA
|
||||
goto :EOF
|
||||
)
|
||||
call :START_PYTHON
|
||||
call :START_JAVA
|
||||
goto :EOF
|
||||
|
||||
:STOP
|
||||
if "%service%"=="%llmparser_service%" (
|
||||
call :STOP_PYTHON
|
||||
goto :EOF
|
||||
)
|
||||
call :STOP_PYTHON
|
||||
call :STOP_JAVA
|
||||
goto :EOF
|
||||
|
||||
:START_PYTHON
|
||||
echo 'python service starting, see logs in llmparser/llmparser.log'
|
||||
echo 'python service starting, see logs in pyllm/pyllm.log'
|
||||
cd "%pythonRunDir%"
|
||||
start /B %python_path% supersonic_llmparser.py > %pythonRunDir%\llmparser.log 2>&1
|
||||
start /B %python_path% supersonic_pyllm.py > %pythonRunDir%\pyllm.log 2>&1
|
||||
timeout /t 10 >nul
|
||||
echo 'python service started'
|
||||
goto :EOF
|
||||
@@ -71,9 +71,9 @@ if "%command%"=="restart" (
|
||||
echo 'java service starting, see logs in logs/'
|
||||
cd "%javaRunDir%"
|
||||
if not exist "%runtimeDir%\supersonic-standalone\logs" mkdir "%runtimeDir%\supersonic-standalone\logs"
|
||||
set "libDir=%runtimeDir%\supersonic-%service%\lib"
|
||||
set "confDir=%runtimeDir%\supersonic-%service%\conf"
|
||||
set "webDir=%runtimeDir%\supersonic-%service%\webapp"
|
||||
set "libDir=%runtimeDir%\supersonic-standalone\lib"
|
||||
set "confDir=%runtimeDir%\supersonic-standalone\conf"
|
||||
set "webDir=%runtimeDir%\supersonic-standalone\webapp"
|
||||
set "classpath=%confDir%;%webDir%;%libDir%\*"
|
||||
set "java-command=-Dfile.encoding=UTF-8 -Duser.language=Zh -Duser.region=CN -Duser.timezone=GMT+08 -Xms1024m -Xmx2048m -cp %CLASSPATH% %MAIN_CLASS%"
|
||||
start /B java %java-command% >nul 2>&1
|
||||
@@ -96,7 +96,7 @@ if "%command%"=="restart" (
|
||||
goto :EOF
|
||||
|
||||
:RELOAD_EXAMPLE
|
||||
cd "%runtimeDir%\supersonic-standalone\llmparser\sql"
|
||||
cd "%runtimeDir%\supersonic-standalone\pyllm\sql"
|
||||
start %python_path% examples_reload_run.py
|
||||
goto :EOF
|
||||
|
||||
|
||||
@@ -22,8 +22,9 @@ app_name=$STANDALONE_APP_NAME
|
||||
main_class="com.tencent.supersonic.StandaloneLauncher"
|
||||
model_name=$service
|
||||
|
||||
if [ "$service" == "llmparser" ]; then
|
||||
if [ "$service" == "pyllm" ]; then
|
||||
model_name=${STANDALONE_SERVICE}
|
||||
export llmProxy=PythonLLMProxy
|
||||
fi
|
||||
|
||||
cd $baseDir
|
||||
@@ -43,14 +44,14 @@ function setAppName {
|
||||
app_name=$CHAT_APP_NAME
|
||||
elif [ "$service" == $SEMANTIC_SERVICE ]; then
|
||||
app_name=$SEMANTIC_APP_NAME
|
||||
elif [ "$service" == $LLMPARSER_SERVICE ]; then
|
||||
app_name=$LLMPARSER_APP_NAME
|
||||
elif [ "$service" == $PYLLM_SERVICE ]; then
|
||||
app_name=$PYLLM_APP_NAME
|
||||
fi
|
||||
}
|
||||
setAppName
|
||||
|
||||
function reloadExamples {
|
||||
pythonRunDir=${runtimeDir}/supersonic-${model_name}/llmparser
|
||||
pythonRunDir=${runtimeDir}/supersonic-${model_name}/pyllm
|
||||
cd $pythonRunDir/sql
|
||||
${python_path} examples_reload_run.py
|
||||
}
|
||||
@@ -61,7 +62,7 @@ function start()
|
||||
local_app_name=$1
|
||||
pid=$(ps aux |grep ${local_app_name} | grep -v grep | awk '{print $2}')
|
||||
if [[ "$pid" == "" ]]; then
|
||||
if [[ ${local_app_name} == $LLMPARSER_APP_NAME ]]; then
|
||||
if [[ ${local_app_name} == $PYLLM_APP_NAME ]]; then
|
||||
runPythonService ${local_app_name}
|
||||
else
|
||||
runJavaService ${local_app_name}
|
||||
@@ -87,7 +88,7 @@ function stop()
|
||||
|
||||
function reload()
|
||||
{
|
||||
if [[ $1 == $LLMPARSER_APP_NAME ]]; then
|
||||
if [[ $1 == $PYLLM_APP_NAME ]]; then
|
||||
reloadExamples
|
||||
fi
|
||||
}
|
||||
@@ -95,11 +96,11 @@ function reload()
|
||||
# 4. execute command operation
|
||||
case "$command" in
|
||||
start)
|
||||
if [ "$service" == $STANDALONE_SERVICE ]; then
|
||||
echo "Starting $LLMPARSER_APP_NAME"
|
||||
start $LLMPARSER_APP_NAME
|
||||
if [ "$service" == $PYLLM_SERVICE ]; then
|
||||
echo "Starting $app_name"
|
||||
start $app_name
|
||||
echo "Starting $STANDALONE_APP_NAME"
|
||||
start $STANDALONE_APP_NAME
|
||||
else
|
||||
echo "Starting $app_name"
|
||||
start $app_name
|
||||
@@ -107,15 +108,10 @@ case "$command" in
|
||||
echo "Start success"
|
||||
;;
|
||||
stop)
|
||||
if [ "$service" == $STANDALONE_SERVICE ]; then
|
||||
echo "Stopping $LLMPARSER_APP_NAME"
|
||||
stop $LLMPARSER_APP_NAME
|
||||
echo "Stopping $app_name"
|
||||
stop $app_name
|
||||
else
|
||||
echo "Stopping $app_name"
|
||||
stop ${app_name}
|
||||
fi
|
||||
echo "Stopping $app_name"
|
||||
stop $app_name
|
||||
echo "Stopping $PYLLM_APP_NAME"
|
||||
stop $PYLLM_APP_NAME
|
||||
echo "Stop success"
|
||||
;;
|
||||
reload)
|
||||
@@ -124,15 +120,15 @@ case "$command" in
|
||||
echo "Reload success"
|
||||
;;
|
||||
restart)
|
||||
if [ "$service" == $STANDALONE_SERVICE ]; then
|
||||
if [ "$service" == $PYLLM_SERVICE ]; then
|
||||
echo "Stopping ${app_name}"
|
||||
stop ${app_name}
|
||||
echo "Stopping ${LLMPARSER_APP_NAME}"
|
||||
stop $LLMPARSER_APP_NAME
|
||||
echo "Starting ${LLMPARSER_APP_NAME}"
|
||||
start $LLMPARSER_APP_NAME
|
||||
echo "Stopping ${STANDALONE_APP_NAME}"
|
||||
stop $STANDALONE_APP_NAME
|
||||
echo "Starting ${app_name}"
|
||||
start ${app_name}
|
||||
echo "Starting ${STANDALONE_APP_NAME}"
|
||||
start $STANDALONE_APP_NAME
|
||||
else
|
||||
echo "Stopping ${app_name}"
|
||||
stop ${app_name}
|
||||
|
||||
@@ -21,8 +21,8 @@
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet>
|
||||
<directory>${project.basedir}/../../chat/core/src/main/python</directory>
|
||||
<outputDirectory>llmparser</outputDirectory>
|
||||
<directory>${project.basedir}/../../chat/python</directory>
|
||||
<outputDirectory>pyllm</outputDirectory>
|
||||
<fileMode>0777</fileMode>
|
||||
<directoryMode>0755</directoryMode>
|
||||
</fileSet>
|
||||
|
||||
@@ -7,6 +7,9 @@ import com.tencent.supersonic.auth.api.authentication.request.UserReq;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* UserAdaptor defines some interfaces for obtaining user and organization information
|
||||
*/
|
||||
public interface UserAdaptor {
|
||||
|
||||
List<String> getUserNames();
|
||||
|
||||
@@ -6,7 +6,7 @@ import lombok.Data;
|
||||
@Data
|
||||
public class AuthGroup {
|
||||
|
||||
private String modelId;
|
||||
private Long modelId;
|
||||
private String name;
|
||||
private Integer groupId;
|
||||
private List<AuthRule> authRules;
|
||||
|
||||
@@ -7,13 +7,13 @@ import lombok.ToString;
|
||||
@ToString
|
||||
public class AuthRes {
|
||||
|
||||
private String modelId;
|
||||
private Long modelId;
|
||||
private String name;
|
||||
|
||||
public AuthRes() {
|
||||
}
|
||||
|
||||
public AuthRes(String modelId, String name) {
|
||||
public AuthRes(Long modelId, String name) {
|
||||
this.modelId = modelId;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package com.tencent.supersonic.auth.api.authorization.request;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.auth.api.authorization.pojo.AuthRes;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@ToString
|
||||
@@ -15,5 +17,17 @@ public class QueryAuthResReq {
|
||||
|
||||
private List<AuthRes> resources;
|
||||
|
||||
private String modelId;
|
||||
private Long modelId;
|
||||
|
||||
private List<Long> modelIds;
|
||||
|
||||
public List<Long> getModelIds() {
|
||||
if (!CollectionUtils.isEmpty(modelIds)) {
|
||||
return modelIds;
|
||||
}
|
||||
if (modelId != null) {
|
||||
return Lists.newArrayList(modelId);
|
||||
}
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,12 +33,6 @@
|
||||
<artifactId>spring-boot-starter-jdbc</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
@@ -52,12 +46,7 @@
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis-spring</artifactId>
|
||||
<version>${mybatis-spring.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.pagehelper</groupId>
|
||||
<artifactId>pagehelper</artifactId>
|
||||
|
||||
@@ -16,6 +16,9 @@ import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* DefaultUserAdaptor provides a default method to obtain user and organization information
|
||||
*/
|
||||
public class DefaultUserAdaptor implements UserAdaptor {
|
||||
|
||||
private List<UserDO> getUserDOList() {
|
||||
|
||||
@@ -29,7 +29,6 @@ public abstract class AuthenticationInterceptor implements HandlerInterceptor {
|
||||
|
||||
protected S2ThreadContext s2ThreadContext;
|
||||
|
||||
|
||||
protected boolean isExcludedUri(String uri) {
|
||||
String excludePathStr = authenticationConfig.getExcludePath();
|
||||
if (Strings.isEmpty(excludePathStr)) {
|
||||
@@ -59,7 +58,6 @@ public abstract class AuthenticationInterceptor implements HandlerInterceptor {
|
||||
return "true".equalsIgnoreCase(internal);
|
||||
}
|
||||
|
||||
|
||||
protected void reflectSetparam(HttpServletRequest request, String key, String value) {
|
||||
try {
|
||||
if (request instanceof StandardMultipartHttpServletRequest) {
|
||||
|
||||
@@ -76,5 +76,4 @@ public class DefaultAuthenticationInterceptor extends AuthenticationInterceptor
|
||||
s2ThreadContext.set(threadContext);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -20,7 +20,6 @@ public class UserRepositoryImpl implements UserRepository {
|
||||
this.userDOMapper = userDOMapper;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<UserDO> getUserList() {
|
||||
return userDOMapper.selectByExample(new UserDOExample());
|
||||
@@ -40,5 +39,4 @@ public class UserRepositoryImpl implements UserRepository {
|
||||
return userDOOptional.orElse(null);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -29,7 +29,6 @@ public class UserController {
|
||||
this.userService = userService;
|
||||
}
|
||||
|
||||
|
||||
@GetMapping("/getCurrentUser")
|
||||
public User getCurrentUser(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) {
|
||||
return UserHolder.findUser(httpServletRequest, httpServletResponse);
|
||||
@@ -70,5 +69,4 @@ public class UserController {
|
||||
return userService.login(userCmd);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -13,7 +13,6 @@ import org.springframework.stereotype.Service;
|
||||
@Service
|
||||
public class UserServiceImpl implements UserService {
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getUserNames() {
|
||||
return ComponentFactory.getUserAdaptor().getUserNames();
|
||||
|
||||
@@ -20,5 +20,4 @@ public class FakeUserStrategy implements UserStrategy {
|
||||
return User.getFakeUser();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -58,7 +58,6 @@ public class UserTokenUtils {
|
||||
return generate(claims);
|
||||
}
|
||||
|
||||
|
||||
public User getUser(HttpServletRequest request) {
|
||||
String token = request.getHeader(authenticationConfig.getTokenHttpHeaderKey());
|
||||
final Claims claims = getClaims(token);
|
||||
@@ -120,5 +119,4 @@ public class UserTokenUtils {
|
||||
.compact();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package com.tencent.supersonic.auth.authorization.application;
|
||||
package com.tencent.supersonic.auth.authorization.service;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.gson.Gson;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.auth.api.authentication.service.UserService;
|
||||
@@ -13,7 +14,6 @@ import com.tencent.supersonic.auth.api.authorization.service.AuthService;
|
||||
import com.tencent.supersonic.auth.api.authorization.pojo.AuthGroup;
|
||||
import com.tencent.supersonic.auth.api.authorization.pojo.AuthRule;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
@@ -48,7 +48,7 @@ public class AuthServiceImpl implements AuthService {
|
||||
public List<AuthGroup> queryAuthGroups(String modelId, Integer groupId) {
|
||||
return load().stream()
|
||||
.filter(group -> (Objects.isNull(groupId) || groupId.equals(group.getGroupId()))
|
||||
&& modelId.equals(group.getModelId()))
|
||||
&& modelId.equals(group.getModelId().toString()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@@ -76,21 +76,17 @@ public class AuthServiceImpl implements AuthService {
|
||||
jdbcTemplate.update("delete from s2_auth_groups where group_id = ?", group.getGroupId());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public AuthorizedResourceResp queryAuthorizedResources(QueryAuthResReq req, User user) {
|
||||
Set<String> userOrgIds = userService.getUserAllOrgId(user.getName());
|
||||
if (!CollectionUtils.isEmpty(userOrgIds)) {
|
||||
req.setDepartmentIds(new ArrayList<>(userOrgIds));
|
||||
}
|
||||
List<AuthGroup> groups = getAuthGroups(req, user.getName());
|
||||
List<AuthGroup> groups = getAuthGroups(req.getModelIds(), user.getName(), new ArrayList<>(userOrgIds));
|
||||
AuthorizedResourceResp resource = new AuthorizedResourceResp();
|
||||
Map<String, List<AuthGroup>> authGroupsByModelId = groups.stream()
|
||||
Map<Long, List<AuthGroup>> authGroupsByModelId = groups.stream()
|
||||
.collect(Collectors.groupingBy(AuthGroup::getModelId));
|
||||
Map<String, List<AuthRes>> reqAuthRes = req.getResources().stream()
|
||||
Map<Long, List<AuthRes>> reqAuthRes = req.getResources().stream()
|
||||
.collect(Collectors.groupingBy(AuthRes::getModelId));
|
||||
|
||||
for (String modelId : reqAuthRes.keySet()) {
|
||||
for (Long modelId : reqAuthRes.keySet()) {
|
||||
List<AuthRes> reqResourcesList = reqAuthRes.get(modelId);
|
||||
AuthResGrp rg = new AuthResGrp();
|
||||
if (authGroupsByModelId.containsKey(modelId)) {
|
||||
@@ -113,8 +109,11 @@ public class AuthServiceImpl implements AuthService {
|
||||
}
|
||||
}
|
||||
|
||||
if (StringUtils.isNotEmpty(req.getModelId())) {
|
||||
List<AuthGroup> authGroups = authGroupsByModelId.get(req.getModelId());
|
||||
if (!CollectionUtils.isEmpty(req.getModelIds())) {
|
||||
List<AuthGroup> authGroups = Lists.newArrayList();
|
||||
for (Long modelId : authGroupsByModelId.keySet()) {
|
||||
authGroups.addAll(authGroupsByModelId.getOrDefault(modelId, Lists.newArrayList()));
|
||||
}
|
||||
if (!CollectionUtils.isEmpty(authGroups)) {
|
||||
for (AuthGroup group : authGroups) {
|
||||
if (group.getDimensionFilters() != null
|
||||
@@ -130,17 +129,17 @@ public class AuthServiceImpl implements AuthService {
|
||||
return resource;
|
||||
}
|
||||
|
||||
private List<AuthGroup> getAuthGroups(QueryAuthResReq req, String userName) {
|
||||
private List<AuthGroup> getAuthGroups(List<Long> modelIds, String userName, List<String> departmentIds) {
|
||||
List<AuthGroup> groups = load().stream()
|
||||
.filter(group -> {
|
||||
if (!Objects.equals(group.getModelId(), req.getModelId())) {
|
||||
if (CollectionUtils.isEmpty(modelIds) || !modelIds.contains(group.getModelId())) {
|
||||
return false;
|
||||
}
|
||||
if (!CollectionUtils.isEmpty(group.getAuthorizedUsers()) && group.getAuthorizedUsers()
|
||||
.contains(userName)) {
|
||||
return true;
|
||||
}
|
||||
for (String departmentId : req.getDepartmentIds()) {
|
||||
for (String departmentId : departmentIds) {
|
||||
if (!CollectionUtils.isEmpty(group.getAuthorizedDepartmentIds())
|
||||
&& group.getAuthorizedDepartmentIds().contains(departmentId)) {
|
||||
return true;
|
||||
@@ -148,7 +147,7 @@ public class AuthServiceImpl implements AuthService {
|
||||
}
|
||||
return false;
|
||||
}).collect(Collectors.toList());
|
||||
log.info("user:{} department:{} authGroups:{}", userName, req.getDepartmentIds(), groups);
|
||||
log.info("user:{} department:{} authGroups:{}", userName, departmentIds, groups);
|
||||
return groups;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.tencent.supersonic.chat.api.component;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import net.sf.jsqlparser.JSQLParserException;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
|
||||
/**
|
||||
* A semantic corrector checks validity of extracted semantic information and
|
||||
@@ -9,5 +9,5 @@ import net.sf.jsqlparser.JSQLParserException;
|
||||
*/
|
||||
public interface SemanticCorrector {
|
||||
|
||||
void correct(SemanticCorrectInfo semanticCorrectInfo) throws JSQLParserException;
|
||||
void correct(QueryReq queryReq, SemanticParseInfo semanticParseInfo);
|
||||
}
|
||||
|
||||
@@ -9,12 +9,13 @@ import com.tencent.supersonic.semantic.api.model.request.PageMetricReq;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DomainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.DimensionResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ExplainResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.MetricResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ModelSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.model.response.QueryResultWithSchemaResp;
|
||||
import com.tencent.supersonic.semantic.api.query.request.ExplainSqlReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDimValueReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryDslReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryS2SQLReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryMultiStructReq;
|
||||
import com.tencent.supersonic.semantic.api.query.request.QueryStructReq;
|
||||
|
||||
@@ -37,7 +38,7 @@ public interface SemanticInterpreter {
|
||||
|
||||
QueryResultWithSchemaResp queryByMultiStruct(QueryMultiStructReq queryMultiStructReq, User user);
|
||||
|
||||
QueryResultWithSchemaResp queryByDsl(QueryDslReq queryDslReq, User user);
|
||||
QueryResultWithSchemaResp queryByS2SQL(QueryS2SQLReq queryS2SQLReq, User user);
|
||||
|
||||
QueryResultWithSchemaResp queryDimValue(QueryDimValueReq queryDimValueReq, User user);
|
||||
|
||||
@@ -47,9 +48,9 @@ public interface SemanticInterpreter {
|
||||
|
||||
ModelSchema getModelSchema(Long model, Boolean cacheEnable);
|
||||
|
||||
PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionCmd);
|
||||
PageInfo<DimensionResp> getDimensionPage(PageDimensionReq pageDimensionReq);
|
||||
|
||||
PageInfo<MetricResp> getMetricPage(PageMetricReq pageMetricCmd, User user);
|
||||
PageInfo<MetricResp> getMetricPage(PageMetricReq pageDimensionReq, User user);
|
||||
|
||||
List<DomainResp> getDomainList(User user);
|
||||
|
||||
@@ -57,4 +58,6 @@ public interface SemanticInterpreter {
|
||||
|
||||
<T> ExplainResp explain(ExplainSqlReq<T> explainSqlReq, User user) throws Exception;
|
||||
|
||||
List<ModelSchemaResp> fetchModelSchema(List<Long> ids, Boolean cacheEnable);
|
||||
|
||||
}
|
||||
|
||||
@@ -3,7 +3,6 @@ package com.tencent.supersonic.chat.api.component;
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.QueryResult;
|
||||
import com.tencent.supersonic.semantic.api.model.response.ExplainResp;
|
||||
import org.apache.calcite.sql.parser.SqlParseException;
|
||||
|
||||
/**
|
||||
@@ -15,7 +14,9 @@ public interface SemanticQuery {
|
||||
|
||||
QueryResult execute(User user) throws SqlParseException;
|
||||
|
||||
ExplainResp explain(User user);
|
||||
void initS2Sql(User user);
|
||||
|
||||
String explain(User user);
|
||||
|
||||
SemanticParseInfo getParseInfo();
|
||||
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
package com.tencent.supersonic.chat.api.pojo;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import com.tencent.supersonic.common.pojo.ModelRela;
|
||||
import lombok.Data;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
|
||||
@@ -13,7 +18,9 @@ public class ModelSchema {
|
||||
private Set<SchemaElement> metrics = new HashSet<>();
|
||||
private Set<SchemaElement> dimensions = new HashSet<>();
|
||||
private Set<SchemaElement> dimensionValues = new HashSet<>();
|
||||
private Set<SchemaElement> tags = new HashSet<>();
|
||||
private SchemaElement entity = new SchemaElement();
|
||||
private List<ModelRela> modelRelas = new ArrayList<>();
|
||||
|
||||
public SchemaElement getElement(SchemaElementType elementType, long elementID) {
|
||||
Optional<SchemaElement> element = Optional.empty();
|
||||
@@ -34,6 +41,9 @@ public class ModelSchema {
|
||||
case VALUE:
|
||||
element = dimensionValues.stream().filter(e -> e.getId() == elementID).findFirst();
|
||||
break;
|
||||
case TAG:
|
||||
element = tags.stream().filter(e -> e.getId() == elementID).findFirst();
|
||||
break;
|
||||
default:
|
||||
}
|
||||
|
||||
@@ -44,4 +54,45 @@ public class ModelSchema {
|
||||
}
|
||||
}
|
||||
|
||||
public SchemaElement getElement(SchemaElementType elementType, String name) {
|
||||
Optional<SchemaElement> element = Optional.empty();
|
||||
|
||||
switch (elementType) {
|
||||
case ENTITY:
|
||||
element = Optional.ofNullable(entity);
|
||||
break;
|
||||
case MODEL:
|
||||
element = Optional.of(model);
|
||||
break;
|
||||
case METRIC:
|
||||
element = metrics.stream().filter(e -> name.equals(e.getName())).findFirst();
|
||||
break;
|
||||
case DIMENSION:
|
||||
element = dimensions.stream().filter(e -> name.equals(e.getName())).findFirst();
|
||||
break;
|
||||
case VALUE:
|
||||
element = dimensionValues.stream().filter(e -> name.equals(e.getName())).findFirst();
|
||||
break;
|
||||
default:
|
||||
}
|
||||
|
||||
if (element.isPresent()) {
|
||||
return element.get();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Set<Long> getModelClusterSet() {
|
||||
if (CollectionUtils.isEmpty(modelRelas)) {
|
||||
return Sets.newHashSet();
|
||||
}
|
||||
Set<Long> modelClusterSet = new HashSet<>();
|
||||
modelRelas.forEach(modelRela -> {
|
||||
modelClusterSet.add(modelRela.getToModelId());
|
||||
modelClusterSet.add(modelRela.getFromModelId());
|
||||
});
|
||||
return modelClusterSet;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ public class QueryContext {
|
||||
private QueryReq request;
|
||||
private List<SemanticQuery> candidateQueries = new ArrayList<>();
|
||||
private SchemaMapInfo mapInfo = new SchemaMapInfo();
|
||||
private SchemaModelClusterMapInfo modelClusterMapInfo = new SchemaModelClusterMapInfo();
|
||||
|
||||
public QueryContext(QueryReq request) {
|
||||
this.request = request;
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.tencent.supersonic.chat.api.pojo;
|
||||
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class RelatedSchemaElement {
|
||||
|
||||
private Long dimensionId;
|
||||
|
||||
private boolean isNecessary;
|
||||
|
||||
}
|
||||
@@ -1,14 +1,15 @@
|
||||
package com.tencent.supersonic.chat.api.pojo;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@Getter
|
||||
@Builder
|
||||
@@ -22,13 +23,14 @@ public class SchemaElement implements Serializable {
|
||||
private String bizName;
|
||||
private Long useCnt;
|
||||
private SchemaElementType type;
|
||||
|
||||
private List<String> alias;
|
||||
|
||||
private List<SchemaValueMap> schemaValueMaps;
|
||||
private List<RelatedSchemaElement> relatedSchemaElements;
|
||||
|
||||
private String defaultAgg;
|
||||
|
||||
private double order;
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
@@ -40,13 +42,13 @@ public class SchemaElement implements Serializable {
|
||||
SchemaElement schemaElement = (SchemaElement) o;
|
||||
return Objects.equal(model, schemaElement.model) && Objects.equal(id,
|
||||
schemaElement.id) && Objects.equal(name, schemaElement.name)
|
||||
&& Objects.equal(bizName, schemaElement.bizName) && Objects.equal(
|
||||
useCnt, schemaElement.useCnt) && Objects.equal(type, schemaElement.type);
|
||||
&& Objects.equal(bizName, schemaElement.bizName)
|
||||
&& Objects.equal(type, schemaElement.type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hashCode(model, id, name, bizName, useCnt, type);
|
||||
return Objects.hashCode(model, id, name, bizName, type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ public enum SchemaElementType {
|
||||
DIMENSION,
|
||||
VALUE,
|
||||
ENTITY,
|
||||
TAG,
|
||||
ID,
|
||||
DATE
|
||||
}
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
package com.tencent.supersonic.chat.api.pojo;
|
||||
|
||||
import com.clickhouse.client.internal.apache.commons.compress.utils.Lists;
|
||||
import com.tencent.supersonic.common.pojo.ModelCluster;
|
||||
import lombok.Data;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
@Data
|
||||
public class SchemaModelClusterMapInfo {
|
||||
|
||||
private Map<String, List<SchemaElementMatch>> modelElementMatches = new HashMap<>();
|
||||
|
||||
public Set<String> getMatchedModelClusters() {
|
||||
return modelElementMatches.keySet();
|
||||
}
|
||||
|
||||
public List<SchemaElementMatch> getMatchedElements(Long modelId) {
|
||||
for (String key : modelElementMatches.keySet()) {
|
||||
if (ModelCluster.getModelIdFromKey(key).contains(modelId)) {
|
||||
return modelElementMatches.get(key);
|
||||
}
|
||||
}
|
||||
return Lists.newArrayList();
|
||||
}
|
||||
|
||||
public List<SchemaElementMatch> getMatchedElements(String modelCluster) {
|
||||
return modelElementMatches.get(modelCluster);
|
||||
}
|
||||
|
||||
public Map<String, List<SchemaElementMatch>> getModelElementMatches() {
|
||||
return modelElementMatches;
|
||||
}
|
||||
|
||||
public Map<String, List<SchemaElementMatch>> getElementMatchesByModelIds(Set<Long> modelIds) {
|
||||
if (CollectionUtils.isEmpty(modelIds)) {
|
||||
return modelElementMatches;
|
||||
}
|
||||
Map<String, List<SchemaElementMatch>> modelElementMatchesFiltered = new HashMap<>();
|
||||
for (String key : modelElementMatches.keySet()) {
|
||||
for (Long modelId : modelIds) {
|
||||
if (ModelCluster.getModelIdFromKey(key).contains(modelId)) {
|
||||
modelElementMatchesFiltered.put(key, modelElementMatches.get(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
return modelElementMatchesFiltered;
|
||||
}
|
||||
|
||||
public void setModelElementMatches(Map<String, List<SchemaElementMatch>> modelElementMatches) {
|
||||
this.modelElementMatches = modelElementMatches;
|
||||
}
|
||||
|
||||
public void setMatchedElements(String modelCluster, List<SchemaElementMatch> elementMatches) {
|
||||
modelElementMatches.put(modelCluster, elementMatches);
|
||||
}
|
||||
}
|
||||
@@ -5,8 +5,13 @@ import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.EntityInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.SqlInfo;
|
||||
import com.tencent.supersonic.common.pojo.DateConf;
|
||||
import com.tencent.supersonic.common.pojo.ModelCluster;
|
||||
import com.tencent.supersonic.common.pojo.Order;
|
||||
import com.tencent.supersonic.common.pojo.enums.QueryType;
|
||||
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.FilterType;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
@@ -15,15 +20,13 @@ import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import com.tencent.supersonic.common.pojo.enums.FilterType;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class SemanticParseInfo {
|
||||
|
||||
private Integer id;
|
||||
private String queryMode;
|
||||
private SchemaElement model;
|
||||
private ModelCluster model = new ModelCluster();
|
||||
private Set<SchemaElement> metrics = new TreeSet<>(new SchemaNameLengthComparator());
|
||||
private Set<SchemaElement> dimensions = new LinkedHashSet();
|
||||
private SchemaElement entity;
|
||||
@@ -34,25 +37,38 @@ public class SemanticParseInfo {
|
||||
private Set<Order> orders = new LinkedHashSet();
|
||||
private DateConf dateInfo;
|
||||
private Long limit;
|
||||
private Boolean nativeQuery = false;
|
||||
private double score;
|
||||
private List<SchemaElementMatch> elementMatches = new ArrayList<>();
|
||||
private Map<String, Object> properties = new HashMap<>();
|
||||
private EntityInfo entityInfo;
|
||||
private SqlInfo sqlInfo = new SqlInfo();
|
||||
private QueryType queryType = QueryType.ID;
|
||||
|
||||
public Long getModelId() {
|
||||
return model != null ? model.getId() : 0L;
|
||||
public String getModelClusterKey() {
|
||||
if (model == null) {
|
||||
return "";
|
||||
}
|
||||
return model.getKey();
|
||||
}
|
||||
|
||||
public String getModelName() {
|
||||
return model != null ? model.getName() : "null";
|
||||
if (model == null) {
|
||||
return "";
|
||||
}
|
||||
return model.getName();
|
||||
}
|
||||
|
||||
private static class SchemaNameLengthComparator implements Comparator<SchemaElement> {
|
||||
|
||||
@Override
|
||||
public int compare(SchemaElement o1, SchemaElement o2) {
|
||||
if (o1.getOrder() != o2.getOrder()) {
|
||||
if (o1.getOrder() < o2.getOrder()) {
|
||||
return -1;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
int len1 = o1.getName().length();
|
||||
int len2 = o2.getName().length();
|
||||
if (len1 != len2) {
|
||||
@@ -70,4 +86,27 @@ public class SemanticParseInfo {
|
||||
return metrics;
|
||||
}
|
||||
|
||||
private Map<Long, Integer> getModelElementCountMap() {
|
||||
Map<Long, Integer> elementCountMap = new HashMap<>();
|
||||
elementMatches.stream().filter(element -> element.getElement().getModel() != null)
|
||||
.forEach(element -> {
|
||||
int count = elementCountMap.getOrDefault(element.getElement().getModel(), 0);
|
||||
elementCountMap.put(element.getElement().getModel(), count + 1);
|
||||
});
|
||||
return elementCountMap;
|
||||
}
|
||||
|
||||
public Long getModelId() {
|
||||
Map<Long, Integer> elementCountMap = getModelElementCountMap();
|
||||
Long modelId = -1L;
|
||||
int maxCnt = 0;
|
||||
for (Long model : elementCountMap.keySet()) {
|
||||
if (elementCountMap.get(model) > maxCnt) {
|
||||
maxCnt = elementCountMap.get(model);
|
||||
modelId = model;
|
||||
}
|
||||
}
|
||||
return modelId;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
package com.tencent.supersonic.chat.api.pojo;
|
||||
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class SemanticSchema implements Serializable {
|
||||
@@ -18,6 +23,64 @@ public class SemanticSchema implements Serializable {
|
||||
modelSchemaList.add(schema);
|
||||
}
|
||||
|
||||
public SchemaElement getElement(SchemaElementType elementType, long elementID) {
|
||||
Optional<SchemaElement> element = Optional.empty();
|
||||
|
||||
switch (elementType) {
|
||||
case ENTITY:
|
||||
element = getElementsById(elementID, getEntities());
|
||||
break;
|
||||
case MODEL:
|
||||
element = getElementsById(elementID, getModels());
|
||||
break;
|
||||
case METRIC:
|
||||
element = getElementsById(elementID, getMetrics());
|
||||
break;
|
||||
case DIMENSION:
|
||||
element = getElementsById(elementID, getDimensions());
|
||||
break;
|
||||
case VALUE:
|
||||
element = getElementsById(elementID, getDimensionValues());
|
||||
break;
|
||||
default:
|
||||
}
|
||||
|
||||
if (element.isPresent()) {
|
||||
return element.get();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public SchemaElement getElementByName(SchemaElementType elementType, String name) {
|
||||
Optional<SchemaElement> element = Optional.empty();
|
||||
|
||||
switch (elementType) {
|
||||
case ENTITY:
|
||||
element = getElementsByNameOrAlias(name, getEntities());
|
||||
break;
|
||||
case MODEL:
|
||||
element = getElementsByNameOrAlias(name, getModels());
|
||||
break;
|
||||
case METRIC:
|
||||
element = getElementsByNameOrAlias(name, getMetrics());
|
||||
break;
|
||||
case DIMENSION:
|
||||
element = getElementsByNameOrAlias(name, getDimensions());
|
||||
break;
|
||||
case VALUE:
|
||||
element = getElementsByNameOrAlias(name, getDimensionValues());
|
||||
break;
|
||||
default:
|
||||
}
|
||||
|
||||
if (element.isPresent()) {
|
||||
return element.get();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public Map<Long, String> getModelIdToName() {
|
||||
return modelSchemaList.stream()
|
||||
.collect(Collectors.toMap(a -> a.getModel().getId(), a -> a.getModel().getName(), (k1, k2) -> k1));
|
||||
@@ -35,9 +98,28 @@ public class SemanticSchema implements Serializable {
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
public List<SchemaElement> getDimensions(Long modelId) {
|
||||
public List<SchemaElement> getDimensions(Set<Long> modelIds) {
|
||||
List<SchemaElement> dimensions = getDimensions();
|
||||
return getElementsByModelId(modelId, dimensions);
|
||||
return getElementsByModelId(modelIds, dimensions);
|
||||
}
|
||||
|
||||
public SchemaElement getDimensions(Long id) {
|
||||
List<SchemaElement> dimensions = getDimensions();
|
||||
Optional<SchemaElement> dimension = getElementsById(id, dimensions);
|
||||
return dimension.orElse(null);
|
||||
}
|
||||
|
||||
public List<SchemaElement> getTags() {
|
||||
List<SchemaElement> tags = new ArrayList<>();
|
||||
modelSchemaList.stream().forEach(d -> tags.addAll(d.getTags()));
|
||||
return tags;
|
||||
}
|
||||
|
||||
public List<SchemaElement> getTags(Set<Long> modelIds) {
|
||||
List<SchemaElement> tags = new ArrayList<>();
|
||||
modelSchemaList.stream().filter(schemaElement -> modelIds.contains(schemaElement.getModel()))
|
||||
.forEach(d -> tags.addAll(d.getTags()));
|
||||
return tags;
|
||||
}
|
||||
|
||||
public List<SchemaElement> getMetrics() {
|
||||
@@ -46,26 +128,55 @@ public class SemanticSchema implements Serializable {
|
||||
return metrics;
|
||||
}
|
||||
|
||||
public List<SchemaElement> getMetrics(Long modelId) {
|
||||
public List<SchemaElement> getMetrics(Set<Long> modelIds) {
|
||||
List<SchemaElement> metrics = getMetrics();
|
||||
return getElementsByModelId(modelId, metrics);
|
||||
return getElementsByModelId(modelIds, metrics);
|
||||
}
|
||||
|
||||
private List<SchemaElement> getElementsByModelId(Long modelId, List<SchemaElement> elements) {
|
||||
public List<SchemaElement> getEntities() {
|
||||
List<SchemaElement> entities = new ArrayList<>();
|
||||
modelSchemaList.stream().forEach(d -> entities.add(d.getEntity()));
|
||||
return entities;
|
||||
}
|
||||
|
||||
private List<SchemaElement> getElementsByModelId(Set<Long> modelIds, List<SchemaElement> elements) {
|
||||
return elements.stream()
|
||||
.filter(schemaElement -> modelId.equals(schemaElement.getModel()))
|
||||
.filter(schemaElement -> modelIds.contains(schemaElement.getModel()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private Optional<SchemaElement> getElementsById(Long id, List<SchemaElement> elements) {
|
||||
return elements.stream()
|
||||
.filter(schemaElement -> id.equals(schemaElement.getId()))
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
private Optional<SchemaElement> getElementsByNameOrAlias(String name, List<SchemaElement> elements) {
|
||||
return elements.stream()
|
||||
.filter(schemaElement ->
|
||||
name.equals(schemaElement.getName()) || schemaElement.getAlias().contains(name)
|
||||
).findFirst();
|
||||
}
|
||||
|
||||
public List<SchemaElement> getModels() {
|
||||
List<SchemaElement> models = new ArrayList<>();
|
||||
modelSchemaList.stream().forEach(d -> models.add(d.getModel()));
|
||||
return models;
|
||||
}
|
||||
|
||||
public List<SchemaElement> getEntities() {
|
||||
List<SchemaElement> entities = new ArrayList<>();
|
||||
modelSchemaList.stream().forEach(d -> entities.add(d.getEntity()));
|
||||
return entities;
|
||||
public Map<String, String> getBizNameToName(Set<Long> modelIds) {
|
||||
List<SchemaElement> allElements = new ArrayList<>();
|
||||
allElements.addAll(getDimensions(modelIds));
|
||||
allElements.addAll(getMetrics(modelIds));
|
||||
return allElements.stream()
|
||||
.collect(Collectors.toMap(SchemaElement::getBizName, SchemaElement::getName, (k1, k2) -> k1));
|
||||
}
|
||||
|
||||
public Map<Long, ModelSchema> getModelSchemaMap() {
|
||||
if (CollectionUtils.isEmpty(modelSchemaList)) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
return modelSchemaList.stream().collect(Collectors.toMap(modelSchema
|
||||
-> modelSchema.getModel().getModel(), modelSchema -> modelSchema));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,6 +32,11 @@ public class ChatConfigBaseReq {
|
||||
*/
|
||||
private List<RecommendedQuestionReq> recommendedQuestions;
|
||||
|
||||
/**
|
||||
* the llm examples about the model
|
||||
*/
|
||||
private String llmExamples;
|
||||
|
||||
/**
|
||||
* available status
|
||||
*/
|
||||
|
||||
@@ -1,16 +1,21 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.request;
|
||||
|
||||
import javax.validation.constraints.NotNull;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class DimensionValueReq {
|
||||
|
||||
private Integer agentId;
|
||||
|
||||
@NotNull
|
||||
private Long elementID;
|
||||
|
||||
@NotNull
|
||||
private Long modelId;
|
||||
|
||||
private String bizName;
|
||||
|
||||
private Object value;
|
||||
@NotNull
|
||||
private String value;
|
||||
}
|
||||
|
||||
@@ -3,16 +3,18 @@ package com.tencent.supersonic.chat.api.pojo.request;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
@Builder
|
||||
@Data
|
||||
public class ExecuteQueryReq {
|
||||
private User user;
|
||||
private Integer agentId;
|
||||
private Integer chatId;
|
||||
private String queryText;
|
||||
private Long queryId = 7L;
|
||||
private Integer parseId = 2;
|
||||
private Long queryId;
|
||||
private Integer parseId;
|
||||
private SemanticParseInfo parseInfo;
|
||||
private boolean saveAnswer = true;
|
||||
private boolean saveAnswer;
|
||||
}
|
||||
|
||||
@@ -13,4 +13,8 @@ public class PageQueryInfoReq {
|
||||
private String userName;
|
||||
|
||||
private List<Long> ids;
|
||||
|
||||
public Integer getLimitStart() {
|
||||
return this.pageSize * (this.current - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,6 @@ public class QueryDataReq {
|
||||
private Set<QueryFilter> dimensionFilters = new HashSet<>();
|
||||
private Set<QueryFilter> metricFilters = new HashSet<>();
|
||||
private DateConf dateInfo;
|
||||
private Long queryId = 7L;
|
||||
private Integer parseId = 2;
|
||||
private Long queryId;
|
||||
private Integer parseId;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.request;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.FilterOperatorEnum;
|
||||
import lombok.Data;
|
||||
import lombok.ToString;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import lombok.Data;
|
||||
public class QueryReq {
|
||||
private String queryText;
|
||||
private Integer chatId;
|
||||
private Long modelId = 0L;
|
||||
private Long modelId;
|
||||
private User user;
|
||||
private QueryFilters queryFilters;
|
||||
private boolean saveAnswer = true;
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.request;
|
||||
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class RecommendReq {
|
||||
|
||||
private Long modelId;
|
||||
|
||||
private Long metricId;
|
||||
|
||||
}
|
||||
@@ -10,7 +10,7 @@ import lombok.NoArgsConstructor;
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@Builder
|
||||
public class SolvedQueryReq {
|
||||
public class SimilarQueryReq {
|
||||
|
||||
private Long queryId;
|
||||
|
||||
@@ -18,7 +18,7 @@ public class SolvedQueryReq {
|
||||
|
||||
private String queryText;
|
||||
|
||||
private Long modelId;
|
||||
private String modelId;
|
||||
|
||||
private Integer agentId;
|
||||
|
||||
@@ -23,6 +23,8 @@ public class ChatConfigResp {
|
||||
|
||||
private List<RecommendedQuestionReq> recommendedQuestions;
|
||||
|
||||
private String llmExamples;
|
||||
|
||||
/**
|
||||
* available status
|
||||
*/
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.List;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ModelInfo extends DataInfo implements Serializable {
|
||||
|
||||
private List<String> words;
|
||||
private String primaryEntityName;
|
||||
private String primaryEntityBizName;
|
||||
private String primaryKey;
|
||||
}
|
||||
|
||||
@@ -1,31 +1,23 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import lombok.Data;
|
||||
import lombok.Getter;
|
||||
import lombok.Builder;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.AllArgsConstructor;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@Getter
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ParseResp {
|
||||
private Integer chatId;
|
||||
private String queryText;
|
||||
private Long queryId;
|
||||
private ParseState state;
|
||||
private List<SemanticParseInfo> selectedParses;
|
||||
private List<SemanticParseInfo> candidateParses;
|
||||
private List<SolvedQueryRecallResp> similarSolvedQuery;
|
||||
private List<SemanticParseInfo> selectedParses = Lists.newArrayList();
|
||||
private ParseTimeCostDO parseTimeCost = new ParseTimeCostDO();
|
||||
|
||||
public enum ParseState {
|
||||
COMPLETED,
|
||||
PENDING,
|
||||
FAILED
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ParseTimeCostDO {
|
||||
|
||||
private long parseStartTime;
|
||||
private long parseTime;
|
||||
private long sqlTime;
|
||||
|
||||
public ParseTimeCostDO() {
|
||||
this.parseStartTime = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class QueryRecallResp {
|
||||
private List<SimilarQueryRecallResp> solvedQueryRecallRespList;
|
||||
private Long queryTimeCost;
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import java.util.Date;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import lombok.Data;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class QueryResp {
|
||||
@@ -13,4 +15,6 @@ public class QueryResp {
|
||||
private String feedback;
|
||||
private String queryText;
|
||||
private QueryResult queryResult;
|
||||
private List<SemanticParseInfo> parseInfos;
|
||||
private List<SimilarQueryRecallResp> similarQueries;
|
||||
}
|
||||
@@ -1,11 +1,12 @@
|
||||
package com.tencent.supersonic.chat.api.pojo.response;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.common.pojo.QueryAuthorization;
|
||||
import com.tencent.supersonic.common.pojo.QueryColumn;
|
||||
import lombok.Data;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class QueryResult {
|
||||
@@ -21,4 +22,6 @@ public class QueryResult {
|
||||
private SemanticParseInfo chatContext;
|
||||
private Object response;
|
||||
private List<Map<String, Object>> queryResults;
|
||||
private Long queryTimeCost;
|
||||
private List<SchemaElement> recommendedDimensions;
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import lombok.Data;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
public class SolvedQueryRecallResp {
|
||||
public class SimilarQueryRecallResp {
|
||||
|
||||
private Long queryId;
|
||||
|
||||
@@ -5,7 +5,7 @@ import lombok.Data;
|
||||
@Data
|
||||
public class SqlInfo {
|
||||
|
||||
private String llmParseSql;
|
||||
private String logicSql;
|
||||
private String querySql;
|
||||
private String s2SQL;
|
||||
private String correctS2SQL;
|
||||
private String querySQL;
|
||||
}
|
||||
|
||||
@@ -40,11 +40,6 @@
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.junit.jupiter</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
@@ -59,16 +54,7 @@
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mybatis.spring.boot</groupId>
|
||||
<artifactId>mybatis-spring-boot-starter</artifactId>
|
||||
<version>${mybatis-spring.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>druid</artifactId>
|
||||
@@ -78,24 +64,6 @@
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mybatis</groupId>
|
||||
<artifactId>mybatis-spring</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mybatis.spring.boot</groupId>
|
||||
<artifactId>mybatis-spring-boot-starter-test</artifactId>
|
||||
<version>${mybatis.test.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.pagehelper</groupId>
|
||||
<artifactId>pagehelper-spring-boot-starter</artifactId>
|
||||
<version>${pagehelper.spring.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.h2database</groupId>
|
||||
@@ -116,7 +84,6 @@
|
||||
<groupId>com.tencent.supersonic</groupId>
|
||||
<artifactId>semantic-query</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.tencent.supersonic</groupId>
|
||||
@@ -124,12 +91,6 @@
|
||||
<version>${project.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.tencent.supersonic</groupId>
|
||||
<artifactId>semantic-query</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.github.xkzhangsan</groupId>
|
||||
|
||||
@@ -3,7 +3,6 @@ package com.tencent.supersonic.chat.agent;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.agent.tool.AgentToolType;
|
||||
import com.tencent.supersonic.common.pojo.RecordInfo;
|
||||
import java.util.Objects;
|
||||
import lombok.Data;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.agent.tool.AgentTool;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
@@ -0,0 +1,8 @@
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
public enum AgentToolType {
|
||||
NL2SQL_RULE,
|
||||
NL2SQL_LLM,
|
||||
PLUGIN,
|
||||
ANALYTICS
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
|
||||
@Data
|
||||
public class DataAnalyticsTool extends AgentTool {
|
||||
|
||||
private Long modelId;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class LLMParserTool extends NL2SQLTool {
|
||||
|
||||
private List<String> exampleQuestions;
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class NL2SQLTool extends AgentTool {
|
||||
|
||||
protected List<Long> modelIds;
|
||||
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
|
||||
import lombok.Data;
|
||||
@@ -1,4 +1,4 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
package com.tencent.supersonic.chat.agent;
|
||||
|
||||
|
||||
import lombok.Data;
|
||||
@@ -7,12 +7,13 @@ import org.apache.commons.collections.CollectionUtils;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class RuleQueryTool extends AgentTool {
|
||||
public class RuleParserTool extends NL2SQLTool {
|
||||
|
||||
private List<Long> modelIds;
|
||||
|
||||
private List<String> queryModes;
|
||||
|
||||
private List<String> queryTypes;
|
||||
|
||||
public boolean isContainsAllModel() {
|
||||
return CollectionUtils.isNotEmpty(modelIds) && modelIds.contains(-1L);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
|
||||
public enum AgentToolType {
|
||||
RULE,
|
||||
DSL,
|
||||
PLUGIN,
|
||||
INTERPRET
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
public class DslTool extends AgentTool {
|
||||
|
||||
private List<Long> modelIds;
|
||||
|
||||
private List<String> exampleQuestions;
|
||||
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
package com.tencent.supersonic.chat.agent.tool;
|
||||
|
||||
import com.tencent.supersonic.chat.parser.llm.interpret.MetricOption;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@Data
|
||||
public class MetricInterpretTool extends AgentTool {
|
||||
|
||||
private Long modelId;
|
||||
|
||||
private List<MetricOption> metricOptions;
|
||||
|
||||
}
|
||||
@@ -1,43 +1,171 @@
|
||||
package com.tencent.supersonic.chat.config;
|
||||
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.SqlGenerationMode;
|
||||
import com.tencent.supersonic.common.service.SysParameterService;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.PropertySource;
|
||||
|
||||
@Configuration
|
||||
@Data
|
||||
@PropertySource("classpath:optimization.properties")
|
||||
//@ComponentScan(basePackages = "com.tencent.supersonic.chat")
|
||||
@Slf4j
|
||||
public class OptimizationConfig {
|
||||
|
||||
@Value("${one.detection.size}")
|
||||
@Value("${one.detection.size:8}")
|
||||
private Integer oneDetectionSize;
|
||||
@Value("${one.detection.max.size}")
|
||||
|
||||
@Value("${one.detection.max.size:20}")
|
||||
private Integer oneDetectionMaxSize;
|
||||
|
||||
@Value("${metric.dimension.min.threshold}")
|
||||
@Value("${metric.dimension.min.threshold:0.3}")
|
||||
private Double metricDimensionMinThresholdConfig;
|
||||
|
||||
@Value("${metric.dimension.threshold}")
|
||||
@Value("${metric.dimension.threshold:0.3}")
|
||||
private Double metricDimensionThresholdConfig;
|
||||
|
||||
@Value("${dimension.value.threshold}")
|
||||
@Value("${dimension.value.threshold:0.5}")
|
||||
private Double dimensionValueThresholdConfig;
|
||||
|
||||
@Value("${function.bonus.threshold}")
|
||||
private Double functionBonusThreshold;
|
||||
|
||||
@Value("${long.text.threshold}")
|
||||
@Value("${long.text.threshold:0.8}")
|
||||
private Double longTextThreshold;
|
||||
|
||||
@Value("${short.text.threshold}")
|
||||
@Value("${short.text.threshold:0.5}")
|
||||
private Double shortTextThreshold;
|
||||
|
||||
@Value("${query.text.length.threshold}")
|
||||
@Value("${query.text.length.threshold:10}")
|
||||
private Integer queryTextLengthThreshold;
|
||||
@Value("${embedding.mapper.word.min:4}")
|
||||
private int embeddingMapperWordMin;
|
||||
|
||||
@Value("${candidate.threshold}")
|
||||
private Double candidateThreshold;
|
||||
@Value("${embedding.mapper.word.max:5}")
|
||||
private int embeddingMapperWordMax;
|
||||
|
||||
@Value("${embedding.mapper.batch:50}")
|
||||
private int embeddingMapperBatch;
|
||||
|
||||
@Value("${embedding.mapper.number:5}")
|
||||
private int embeddingMapperNumber;
|
||||
|
||||
@Value("${embedding.mapper.round.number:10}")
|
||||
private int embeddingMapperRoundNumber;
|
||||
|
||||
@Value("${embedding.mapper.distance.threshold:0.58}")
|
||||
private Double embeddingMapperDistanceThreshold;
|
||||
|
||||
@Value("${s2SQL.linking.value.switch:true}")
|
||||
private boolean useLinkingValueSwitch;
|
||||
|
||||
@Value("${s2SQL.generation:TWO_PASS_AUTO_COT}")
|
||||
private SqlGenerationMode sqlGenerationMode;
|
||||
|
||||
@Value("${s2SQL.use.switch:true}")
|
||||
private boolean useS2SqlSwitch;
|
||||
|
||||
@Value("${text2sql.example.num:10}")
|
||||
private int text2sqlExampleNum;
|
||||
|
||||
@Value("${text2sql.fewShots.num:5}")
|
||||
private int text2sqlFewShotsNum;
|
||||
|
||||
@Value("${text2sql.self.consistency.num:2}")
|
||||
private int text2sqlSelfConsistencyNum;
|
||||
|
||||
@Value("${text2sql.collection.name:text2dsl_agent_collection}")
|
||||
private String text2sqlCollectionName;
|
||||
|
||||
@Autowired
|
||||
private SysParameterService sysParameterService;
|
||||
|
||||
public Integer getOneDetectionSize() {
|
||||
return convertValue("one.detection.size", Integer.class, oneDetectionSize);
|
||||
}
|
||||
|
||||
public Integer getOneDetectionMaxSize() {
|
||||
return convertValue("one.detection.max.size", Integer.class, oneDetectionMaxSize);
|
||||
}
|
||||
|
||||
public Double getMetricDimensionMinThresholdConfig() {
|
||||
return convertValue("metric.dimension.min.threshold", Double.class, metricDimensionMinThresholdConfig);
|
||||
}
|
||||
|
||||
public Double getMetricDimensionThresholdConfig() {
|
||||
return convertValue("metric.dimension.threshold", Double.class, metricDimensionThresholdConfig);
|
||||
}
|
||||
|
||||
public Double getDimensionValueThresholdConfig() {
|
||||
return convertValue("dimension.value.threshold", Double.class, dimensionValueThresholdConfig);
|
||||
}
|
||||
|
||||
public Double getLongTextThreshold() {
|
||||
return convertValue("long.text.threshold", Double.class, longTextThreshold);
|
||||
}
|
||||
|
||||
public Double getShortTextThreshold() {
|
||||
return convertValue("short.text.threshold", Double.class, shortTextThreshold);
|
||||
}
|
||||
|
||||
public Integer getQueryTextLengthThreshold() {
|
||||
return convertValue("query.text.length.threshold", Integer.class, queryTextLengthThreshold);
|
||||
}
|
||||
|
||||
public boolean isUseS2SqlSwitch() {
|
||||
return convertValue("use.s2SQL.switch", Boolean.class, useS2SqlSwitch);
|
||||
}
|
||||
|
||||
public Integer getEmbeddingMapperWordMin() {
|
||||
return convertValue("embedding.mapper.word.min", Integer.class, embeddingMapperWordMin);
|
||||
}
|
||||
|
||||
public Integer getEmbeddingMapperWordMax() {
|
||||
return convertValue("embedding.mapper.word.max", Integer.class, embeddingMapperWordMax);
|
||||
}
|
||||
|
||||
public Integer getEmbeddingMapperBatch() {
|
||||
return convertValue("embedding.mapper.batch", Integer.class, embeddingMapperBatch);
|
||||
}
|
||||
|
||||
public Integer getEmbeddingMapperNumber() {
|
||||
return convertValue("embedding.mapper.number", Integer.class, embeddingMapperNumber);
|
||||
}
|
||||
|
||||
public Integer getEmbeddingMapperRoundNumber() {
|
||||
return convertValue("embedding.mapper.round.number", Integer.class, embeddingMapperRoundNumber);
|
||||
}
|
||||
|
||||
public Double getEmbeddingMapperDistanceThreshold() {
|
||||
return convertValue("embedding.mapper.distance.threshold", Double.class, embeddingMapperDistanceThreshold);
|
||||
}
|
||||
|
||||
public boolean isUseLinkingValueSwitch() {
|
||||
return convertValue("s2SQL.linking.value.switch", Boolean.class, useLinkingValueSwitch);
|
||||
}
|
||||
|
||||
public SqlGenerationMode getSqlGenerationMode() {
|
||||
return convertValue("s2SQL.generation", SqlGenerationMode.class, sqlGenerationMode);
|
||||
}
|
||||
|
||||
public <T> T convertValue(String paramName, Class<T> targetType, T defaultValue) {
|
||||
try {
|
||||
String value = sysParameterService.getSysParameter().getParameterByName(paramName);
|
||||
if (StringUtils.isBlank(value)) {
|
||||
return defaultValue;
|
||||
}
|
||||
if (targetType == Double.class) {
|
||||
return targetType.cast(Double.parseDouble(value));
|
||||
} else if (targetType == Integer.class) {
|
||||
return targetType.cast(Integer.parseInt(value));
|
||||
} else if (targetType == Boolean.class) {
|
||||
return targetType.cast(Boolean.parseBoolean(value));
|
||||
} else if (targetType == SqlGenerationMode.class) {
|
||||
return targetType.cast(SqlGenerationMode.valueOf(value));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("convertValue", e);
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -2,14 +2,21 @@ package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SemanticCorrector;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.pojo.enums.AggregateTypeEnum;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.DateUtils;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserAddHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.commons.lang3.tuple.Pair;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@@ -17,17 +24,29 @@ import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* basic semantic correction functionality, offering common methods and an
|
||||
* abstract method called doCorrect
|
||||
*/
|
||||
@Slf4j
|
||||
public abstract class BaseSemanticCorrector implements SemanticCorrector {
|
||||
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
semanticCorrectInfo.setPreSql(semanticCorrectInfo.getSql());
|
||||
public void correct(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
try {
|
||||
if (StringUtils.isBlank(semanticParseInfo.getSqlInfo().getCorrectS2SQL())) {
|
||||
return;
|
||||
}
|
||||
doCorrect(queryReq, semanticParseInfo);
|
||||
log.info("sqlCorrection:{} sql:{}", this.getClass().getSimpleName(), semanticParseInfo.getSqlInfo());
|
||||
} catch (Exception e) {
|
||||
log.error(String.format("correct error,sqlInfo:%s", semanticParseInfo.getSqlInfo()), e);
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<String, String> getFieldNameMap(Long modelId) {
|
||||
public abstract void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo);
|
||||
|
||||
protected Map<String, String> getFieldNameMap(Set<Long> modelIds) {
|
||||
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
|
||||
@@ -35,34 +54,59 @@ public abstract class BaseSemanticCorrector implements SemanticCorrector {
|
||||
dbAllFields.addAll(semanticSchema.getMetrics());
|
||||
dbAllFields.addAll(semanticSchema.getDimensions());
|
||||
|
||||
// support fieldName and field alias
|
||||
Map<String, String> result = dbAllFields.stream()
|
||||
.filter(entry -> entry.getModel().equals(modelId))
|
||||
.collect(Collectors.toMap(SchemaElement::getName, a -> a.getName(), (k1, k2) -> k1));
|
||||
result.put(DateUtils.DATE_FIELD, DateUtils.DATE_FIELD);
|
||||
.filter(entry -> modelIds.contains(entry.getModel()))
|
||||
.flatMap(schemaElement -> {
|
||||
Set<String> elements = new HashSet<>();
|
||||
elements.add(schemaElement.getName());
|
||||
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
elements.addAll(schemaElement.getAlias());
|
||||
}
|
||||
return elements.stream();
|
||||
})
|
||||
.collect(Collectors.toMap(a -> a, a -> a, (k1, k2) -> k1));
|
||||
result.put(TimeDimensionEnum.DAY.getChName(), TimeDimensionEnum.DAY.getChName());
|
||||
result.put(TimeDimensionEnum.MONTH.getChName(), TimeDimensionEnum.MONTH.getChName());
|
||||
result.put(TimeDimensionEnum.WEEK.getChName(), TimeDimensionEnum.WEEK.getChName());
|
||||
|
||||
result.put(TimeDimensionEnum.DAY.getName(), TimeDimensionEnum.DAY.getChName());
|
||||
result.put(TimeDimensionEnum.MONTH.getName(), TimeDimensionEnum.MONTH.getChName());
|
||||
result.put(TimeDimensionEnum.WEEK.getName(), TimeDimensionEnum.WEEK.getChName());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void addFieldsToSelect(SemanticCorrectInfo semanticCorrectInfo, String sql) {
|
||||
Set<String> selectFields = new HashSet<>(SqlParserSelectHelper.getSelectFields(sql));
|
||||
Set<String> needAddFields = new HashSet<>(SqlParserSelectHelper.getGroupByFields(sql));
|
||||
needAddFields.addAll(SqlParserSelectHelper.getOrderByFields(sql));
|
||||
protected void addFieldsToSelect(SemanticParseInfo semanticParseInfo, String correctS2SQL) {
|
||||
Set<String> selectFields = new HashSet<>(SqlParserSelectHelper.getSelectFields(correctS2SQL));
|
||||
Set<String> needAddFields = new HashSet<>(SqlParserSelectHelper.getGroupByFields(correctS2SQL));
|
||||
needAddFields.addAll(SqlParserSelectHelper.getOrderByFields(correctS2SQL));
|
||||
|
||||
// If there is no aggregate function in the S2SQL statement and
|
||||
// there is a data field in 'WHERE' statement, add the field to the 'SELECT' statement.
|
||||
if (!SqlParserSelectFunctionHelper.hasAggregateFunction(correctS2SQL)) {
|
||||
List<String> whereFields = SqlParserSelectHelper.getWhereFields(correctS2SQL);
|
||||
List<String> timeChNameList = TimeDimensionEnum.getChNameList();
|
||||
Set<String> timeFields = whereFields.stream().filter(field -> timeChNameList.contains(field))
|
||||
.collect(Collectors.toSet());
|
||||
needAddFields.addAll(timeFields);
|
||||
}
|
||||
|
||||
if (CollectionUtils.isEmpty(selectFields) || CollectionUtils.isEmpty(needAddFields)) {
|
||||
return;
|
||||
}
|
||||
|
||||
needAddFields.removeAll(selectFields);
|
||||
needAddFields.remove(DateUtils.DATE_FIELD);
|
||||
String replaceFields = SqlParserAddHelper.addFieldsToSelect(sql, new ArrayList<>(needAddFields));
|
||||
semanticCorrectInfo.setSql(replaceFields);
|
||||
String replaceFields = SqlParserAddHelper.addFieldsToSelect(correctS2SQL, new ArrayList<>(needAddFields));
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(replaceFields);
|
||||
}
|
||||
|
||||
protected void addAggregateToMetric(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
protected void addAggregateToMetric(SemanticParseInfo semanticParseInfo) {
|
||||
//add aggregate to all metric
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getModel();
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
Set<Long> modelIds = semanticParseInfo.getModel().getModelIds();
|
||||
|
||||
List<SchemaElement> metrics = getMetricElements(modelId);
|
||||
List<SchemaElement> metrics = getMetricElements(modelIds);
|
||||
|
||||
Map<String, String> metricToAggregate = metrics.stream()
|
||||
.map(schemaElement -> {
|
||||
@@ -70,23 +114,26 @@ public abstract class BaseSemanticCorrector implements SemanticCorrector {
|
||||
schemaElement.setDefaultAgg(AggregateTypeEnum.SUM.name());
|
||||
}
|
||||
return schemaElement;
|
||||
}).collect(Collectors.toMap(a -> a.getName(), a -> a.getDefaultAgg(), (k1, k2) -> k1));
|
||||
}).flatMap(schemaElement -> {
|
||||
Set<String> elements = new HashSet<>();
|
||||
elements.add(schemaElement.getName());
|
||||
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
elements.addAll(schemaElement.getAlias());
|
||||
}
|
||||
return elements.stream().map(element -> Pair.of(element, schemaElement.getDefaultAgg())
|
||||
);
|
||||
}).collect(Collectors.toMap(Pair::getLeft, Pair::getRight, (k1, k2) -> k1));
|
||||
|
||||
if (CollectionUtils.isEmpty(metricToAggregate)) {
|
||||
return;
|
||||
}
|
||||
|
||||
String aggregateSql = SqlParserAddHelper.addAggregateToField(sql, metricToAggregate);
|
||||
semanticCorrectInfo.setSql(aggregateSql);
|
||||
String aggregateSql = SqlParserAddHelper.addAggregateToField(correctS2SQL, metricToAggregate);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(aggregateSql);
|
||||
}
|
||||
|
||||
protected List<SchemaElement> getMetricElements(Long modelId) {
|
||||
protected List<SchemaElement> getMetricElements(Set<Long> modelIds) {
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
return semanticSchema.getMetrics(modelId);
|
||||
return semanticSchema.getMetrics(modelIds);
|
||||
}
|
||||
|
||||
protected List<SchemaElement> getDimensionElements(Long modelId) {
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
return semanticSchema.getDimensions(modelId);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserReplaceHelper;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
/**
|
||||
* Perform SQL corrections on the "From" section in S2SQL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class FromCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
String modelName = semanticParseInfo.getModel().getName();
|
||||
SqlParserReplaceHelper.replaceTable(semanticParseInfo.getSqlInfo().getCorrectS2SQL(), modelName);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserAddHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import java.util.Objects;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
|
||||
@Slf4j
|
||||
public class GlobalAfterCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
|
||||
super.correct(semanticCorrectInfo);
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
if (!SqlParserSelectFunctionHelper.hasAggregateFunction(sql)) {
|
||||
return;
|
||||
}
|
||||
Expression havingExpression = SqlParserSelectHelper.getHavingExpression(sql);
|
||||
if (Objects.nonNull(havingExpression)) {
|
||||
String replaceSql = SqlParserAddHelper.addFunctionToSelect(sql, havingExpression);
|
||||
semanticCorrectInfo.setSql(replaceSql);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,96 +0,0 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.parser.llm.dsl.DSLParseResult;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq.ElementValue;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserReplaceHelper;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public class GlobalBeforeCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
|
||||
super.correct(semanticCorrectInfo);
|
||||
|
||||
replaceAlias(semanticCorrectInfo);
|
||||
|
||||
updateFieldNameByLinkingValue(semanticCorrectInfo);
|
||||
|
||||
updateFieldValueByLinkingValue(semanticCorrectInfo);
|
||||
|
||||
correctFieldName(semanticCorrectInfo);
|
||||
}
|
||||
|
||||
private void replaceAlias(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
String replaceAlias = SqlParserReplaceHelper.replaceAlias(semanticCorrectInfo.getSql());
|
||||
semanticCorrectInfo.setSql(replaceAlias);
|
||||
}
|
||||
|
||||
private void correctFieldName(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
|
||||
Map<String, String> fieldNameMap = getFieldNameMap(semanticCorrectInfo.getParseInfo().getModelId());
|
||||
|
||||
String sql = SqlParserReplaceHelper.replaceFields(semanticCorrectInfo.getSql(), fieldNameMap);
|
||||
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
}
|
||||
|
||||
private void updateFieldNameByLinkingValue(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
List<ElementValue> linking = getLinkingValues(semanticCorrectInfo);
|
||||
if (CollectionUtils.isEmpty(linking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Set<String>> fieldValueToFieldNames = linking.stream().collect(
|
||||
Collectors.groupingBy(ElementValue::getFieldValue,
|
||||
Collectors.mapping(ElementValue::getFieldName, Collectors.toSet())));
|
||||
|
||||
String sql = SqlParserReplaceHelper.replaceFieldNameByValue(semanticCorrectInfo.getSql(),
|
||||
fieldValueToFieldNames);
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
}
|
||||
|
||||
private List<ElementValue> getLinkingValues(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
Object context = semanticCorrectInfo.getParseInfo().getProperties().get(Constants.CONTEXT);
|
||||
if (Objects.isNull(context)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
DSLParseResult dslParseResult = JsonUtil.toObject(JsonUtil.toString(context), DSLParseResult.class);
|
||||
if (Objects.isNull(dslParseResult) || Objects.isNull(dslParseResult.getLlmReq())) {
|
||||
return null;
|
||||
}
|
||||
LLMReq llmReq = dslParseResult.getLlmReq();
|
||||
return llmReq.getLinking();
|
||||
}
|
||||
|
||||
|
||||
private void updateFieldValueByLinkingValue(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
List<ElementValue> linking = getLinkingValues(semanticCorrectInfo);
|
||||
if (CollectionUtils.isEmpty(linking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Map<String, String>> filedNameToValueMap = linking.stream().collect(
|
||||
Collectors.groupingBy(ElementValue::getFieldName,
|
||||
Collectors.mapping(ElementValue::getFieldValue, Collectors.toMap(
|
||||
oldValue -> oldValue,
|
||||
newValue -> newValue,
|
||||
(existingValue, newValue) -> newValue)
|
||||
)));
|
||||
|
||||
String sql = SqlParserReplaceHelper.replaceValue(semanticCorrectInfo.getSql(), filedNameToValueMap, false);
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
}
|
||||
}
|
||||
@@ -1,46 +1,71 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.SqlInfo;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.DateUtils;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserAddHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Perform SQL corrections on the "Group by" section in S2SQL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class GroupByCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
|
||||
super.correct(semanticCorrectInfo);
|
||||
addGroupByFields(semanticParseInfo);
|
||||
|
||||
addGroupByFields(semanticCorrectInfo);
|
||||
|
||||
addAggregate(semanticCorrectInfo);
|
||||
}
|
||||
|
||||
private void addGroupByFields(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getModel();
|
||||
private void addGroupByFields(SemanticParseInfo semanticParseInfo) {
|
||||
Set<Long> modelIds = semanticParseInfo.getModel().getModelIds();
|
||||
|
||||
//add dimension group by
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String correctS2SQL = sqlInfo.getCorrectS2SQL();
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
Set<String> dimensions = semanticSchema.getDimensions(modelId).stream()
|
||||
.map(schemaElement -> schemaElement.getName()).collect(Collectors.toSet());
|
||||
dimensions.add(DateUtils.DATE_FIELD);
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(sql);
|
||||
//add alias field name
|
||||
Set<String> dimensions = semanticSchema.getDimensions(modelIds).stream()
|
||||
.flatMap(
|
||||
schemaElement -> {
|
||||
Set<String> elements = new HashSet<>();
|
||||
elements.add(schemaElement.getName());
|
||||
if (!CollectionUtils.isEmpty(schemaElement.getAlias())) {
|
||||
elements.addAll(schemaElement.getAlias());
|
||||
}
|
||||
return elements.stream();
|
||||
}
|
||||
).collect(Collectors.toSet());
|
||||
dimensions.add(TimeDimensionEnum.DAY.getChName());
|
||||
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(correctS2SQL);
|
||||
|
||||
if (CollectionUtils.isEmpty(selectFields) || CollectionUtils.isEmpty(dimensions)) {
|
||||
return;
|
||||
}
|
||||
List<String> aggregateFields = SqlParserSelectHelper.getAggregateFields(sql);
|
||||
// if only date in select not add group by.
|
||||
if (selectFields.size() == 1 && selectFields.contains(TimeDimensionEnum.DAY.getChName())) {
|
||||
return;
|
||||
}
|
||||
if (SqlParserSelectHelper.hasGroupBy(correctS2SQL)) {
|
||||
log.info("not add group by ,exist group by in correctS2SQL:{}", correctS2SQL);
|
||||
return;
|
||||
}
|
||||
|
||||
List<String> aggregateFields = SqlParserSelectHelper.getAggregateFields(correctS2SQL);
|
||||
Set<String> groupByFields = selectFields.stream()
|
||||
.filter(field -> dimensions.contains(field))
|
||||
.filter(field -> {
|
||||
@@ -50,14 +75,17 @@ public class GroupByCorrector extends BaseSemanticCorrector {
|
||||
return true;
|
||||
})
|
||||
.collect(Collectors.toSet());
|
||||
semanticCorrectInfo.setSql(SqlParserAddHelper.addGroupBy(sql, groupByFields));
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(SqlParserAddHelper.addGroupBy(correctS2SQL, groupByFields));
|
||||
|
||||
addAggregate(semanticParseInfo);
|
||||
}
|
||||
|
||||
private void addAggregate(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
List<String> sqlGroupByFields = SqlParserSelectHelper.getGroupByFields(semanticCorrectInfo.getSql());
|
||||
private void addAggregate(SemanticParseInfo semanticParseInfo) {
|
||||
List<String> sqlGroupByFields = SqlParserSelectHelper.getGroupByFields(
|
||||
semanticParseInfo.getSqlInfo().getCorrectS2SQL());
|
||||
if (CollectionUtils.isEmpty(sqlGroupByFields)) {
|
||||
return;
|
||||
}
|
||||
addAggregateToMetric(semanticCorrectInfo);
|
||||
addAggregateToMetric(semanticParseInfo);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,37 +1,65 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserAddHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* Perform SQL corrections on the "Having" section in S2SQL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class HavingCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
|
||||
super.correct(semanticCorrectInfo);
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
|
||||
//add aggregate to all metric
|
||||
semanticCorrectInfo.setPreSql(semanticCorrectInfo.getSql());
|
||||
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getModel();
|
||||
addHaving(semanticParseInfo);
|
||||
|
||||
//add having expression filed to select
|
||||
addHavingToSelect(semanticParseInfo);
|
||||
|
||||
}
|
||||
|
||||
private void addHaving(SemanticParseInfo semanticParseInfo) {
|
||||
Set<Long> modelIds = semanticParseInfo.getModel().getModelIds();
|
||||
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
|
||||
Set<String> metrics = semanticSchema.getMetrics(modelId).stream()
|
||||
Set<String> metrics = semanticSchema.getMetrics(modelIds).stream()
|
||||
.map(schemaElement -> schemaElement.getName()).collect(Collectors.toSet());
|
||||
|
||||
if (CollectionUtils.isEmpty(metrics)) {
|
||||
return;
|
||||
}
|
||||
String havingSql = SqlParserAddHelper.addHaving(semanticCorrectInfo.getSql(), metrics);
|
||||
semanticCorrectInfo.setSql(havingSql);
|
||||
String havingSql = SqlParserAddHelper.addHaving(semanticParseInfo.getSqlInfo().getCorrectS2SQL(), metrics);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(havingSql);
|
||||
}
|
||||
|
||||
private void addHavingToSelect(SemanticParseInfo semanticParseInfo) {
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
if (!SqlParserSelectFunctionHelper.hasAggregateFunction(correctS2SQL)) {
|
||||
return;
|
||||
}
|
||||
List<Expression> havingExpressionList = SqlParserSelectHelper.getHavingExpression(correctS2SQL);
|
||||
if (!CollectionUtils.isEmpty(havingExpressionList)) {
|
||||
String replaceSql = SqlParserAddHelper.addFunctionToSelect(correctS2SQL, havingExpressionList);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(replaceSql);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.SqlInfo;
|
||||
import com.tencent.supersonic.chat.parser.sql.llm.ParseResult;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.ElementValue;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.AggregateEnum;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserReplaceHelper;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* Perform schema corrections on the Schema information in S2QL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class SchemaCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
|
||||
correctAggFunction(semanticParseInfo);
|
||||
|
||||
replaceAlias(semanticParseInfo);
|
||||
|
||||
updateFieldNameByLinkingValue(semanticParseInfo);
|
||||
|
||||
updateFieldValueByLinkingValue(semanticParseInfo);
|
||||
|
||||
correctFieldName(semanticParseInfo);
|
||||
}
|
||||
|
||||
private void correctAggFunction(SemanticParseInfo semanticParseInfo) {
|
||||
Map<String, String> aggregateEnum = AggregateEnum.getAggregateEnum();
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String sql = SqlParserReplaceHelper.replaceFunction(sqlInfo.getCorrectS2SQL(), aggregateEnum);
|
||||
sqlInfo.setCorrectS2SQL(sql);
|
||||
}
|
||||
|
||||
private void replaceAlias(SemanticParseInfo semanticParseInfo) {
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String replaceAlias = SqlParserReplaceHelper.replaceAlias(sqlInfo.getCorrectS2SQL());
|
||||
sqlInfo.setCorrectS2SQL(replaceAlias);
|
||||
}
|
||||
|
||||
private void correctFieldName(SemanticParseInfo semanticParseInfo) {
|
||||
Map<String, String> fieldNameMap = getFieldNameMap(semanticParseInfo.getModel().getModelIds());
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String sql = SqlParserReplaceHelper.replaceFields(sqlInfo.getCorrectS2SQL(), fieldNameMap);
|
||||
sqlInfo.setCorrectS2SQL(sql);
|
||||
}
|
||||
|
||||
private void updateFieldNameByLinkingValue(SemanticParseInfo semanticParseInfo) {
|
||||
List<ElementValue> linking = getLinkingValues(semanticParseInfo);
|
||||
if (CollectionUtils.isEmpty(linking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Set<String>> fieldValueToFieldNames = linking.stream().collect(
|
||||
Collectors.groupingBy(ElementValue::getFieldValue,
|
||||
Collectors.mapping(ElementValue::getFieldName, Collectors.toSet())));
|
||||
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
|
||||
String sql = SqlParserReplaceHelper.replaceFieldNameByValue(sqlInfo.getCorrectS2SQL(), fieldValueToFieldNames);
|
||||
sqlInfo.setCorrectS2SQL(sql);
|
||||
}
|
||||
|
||||
private List<ElementValue> getLinkingValues(SemanticParseInfo semanticParseInfo) {
|
||||
Object context = semanticParseInfo.getProperties().get(Constants.CONTEXT);
|
||||
if (Objects.isNull(context)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ParseResult parseResult = JsonUtil.toObject(JsonUtil.toString(context), ParseResult.class);
|
||||
if (Objects.isNull(parseResult) || Objects.isNull(parseResult.getLlmReq())) {
|
||||
return null;
|
||||
}
|
||||
return parseResult.getLinkingValues();
|
||||
}
|
||||
|
||||
private void updateFieldValueByLinkingValue(SemanticParseInfo semanticParseInfo) {
|
||||
List<ElementValue> linking = getLinkingValues(semanticParseInfo);
|
||||
if (CollectionUtils.isEmpty(linking)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Map<String, String>> filedNameToValueMap = linking.stream().collect(
|
||||
Collectors.groupingBy(ElementValue::getFieldName,
|
||||
Collectors.mapping(ElementValue::getFieldValue, Collectors.toMap(
|
||||
oldValue -> oldValue,
|
||||
newValue -> newValue,
|
||||
(existingValue, newValue) -> newValue)
|
||||
)));
|
||||
|
||||
SqlInfo sqlInfo = semanticParseInfo.getSqlInfo();
|
||||
String sql = SqlParserReplaceHelper.replaceValue(sqlInfo.getCorrectS2SQL(), filedNameToValueMap, false);
|
||||
sqlInfo.setCorrectS2SQL(sql);
|
||||
}
|
||||
}
|
||||
@@ -1,26 +1,29 @@
|
||||
package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* Perform SQL corrections on the "Select" section in S2SQL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class SelectCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
super.correct(semanticCorrectInfo);
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
List<String> aggregateFields = SqlParserSelectHelper.getAggregateFields(sql);
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(sql);
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
List<String> aggregateFields = SqlParserSelectHelper.getAggregateFields(correctS2SQL);
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(correctS2SQL);
|
||||
// If the number of aggregated fields is equal to the number of queried fields, do not add fields to select.
|
||||
if (!CollectionUtils.isEmpty(aggregateFields)
|
||||
&& !CollectionUtils.isEmpty(selectFields)
|
||||
&& aggregateFields.size() == selectFields.size()) {
|
||||
return;
|
||||
}
|
||||
addFieldsToSelect(semanticCorrectInfo, sql);
|
||||
addFieldsToSelect(semanticParseInfo, correctS2SQL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,23 +2,19 @@ package com.tencent.supersonic.chat.corrector;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaValueMap;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilters;
|
||||
import com.tencent.supersonic.chat.parser.llm.dsl.DSLDateHelper;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.parser.sql.llm.S2SqlDateHelper;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.enums.TimeDimensionEnum;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.DateUtils;
|
||||
import com.tencent.supersonic.common.util.StringUtil;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserAddHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserReplaceHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import net.sf.jsqlparser.JSQLParserException;
|
||||
import net.sf.jsqlparser.expression.Expression;
|
||||
@@ -27,56 +23,67 @@ import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* Perform SQL corrections on the "Where" section in S2SQL.
|
||||
*/
|
||||
@Slf4j
|
||||
public class WhereCorrector extends BaseSemanticCorrector {
|
||||
|
||||
@Override
|
||||
public void correct(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
public void doCorrect(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
|
||||
super.correct(semanticCorrectInfo);
|
||||
addDateIfNotExist(semanticParseInfo);
|
||||
|
||||
addDateIfNotExist(semanticCorrectInfo);
|
||||
parserDateDiffFunction(semanticParseInfo);
|
||||
|
||||
parserDateDiffFunction(semanticCorrectInfo);
|
||||
addQueryFilter(queryReq, semanticParseInfo);
|
||||
|
||||
addQueryFilter(semanticCorrectInfo);
|
||||
|
||||
updateFieldValueByTechName(semanticCorrectInfo);
|
||||
updateFieldValueByTechName(semanticParseInfo);
|
||||
}
|
||||
|
||||
private void addQueryFilter(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
String queryFilter = getQueryFilter(semanticCorrectInfo.getQueryFilters());
|
||||
private void addQueryFilter(QueryReq queryReq, SemanticParseInfo semanticParseInfo) {
|
||||
String queryFilter = getQueryFilter(queryReq.getQueryFilters());
|
||||
|
||||
String preSql = semanticCorrectInfo.getSql();
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
|
||||
if (StringUtils.isNotEmpty(queryFilter)) {
|
||||
log.info("add queryFilter to preSql :{}", queryFilter);
|
||||
log.info("add queryFilter to correctS2SQL :{}", queryFilter);
|
||||
Expression expression = null;
|
||||
try {
|
||||
expression = CCJSqlParserUtil.parseCondExpression(queryFilter);
|
||||
} catch (JSQLParserException e) {
|
||||
log.error("parseCondExpression", e);
|
||||
}
|
||||
String sql = SqlParserAddHelper.addWhere(preSql, expression);
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
correctS2SQL = SqlParserAddHelper.addWhere(correctS2SQL, expression);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(correctS2SQL);
|
||||
}
|
||||
}
|
||||
|
||||
private void parserDateDiffFunction(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
sql = SqlParserReplaceHelper.replaceFunction(sql);
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
private void parserDateDiffFunction(SemanticParseInfo semanticParseInfo) {
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
correctS2SQL = SqlParserReplaceHelper.replaceFunction(correctS2SQL);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(correctS2SQL);
|
||||
}
|
||||
|
||||
private void addDateIfNotExist(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
String sql = semanticCorrectInfo.getSql();
|
||||
List<String> whereFields = SqlParserSelectHelper.getWhereFields(sql);
|
||||
if (CollectionUtils.isEmpty(whereFields) || !whereFields.contains(DateUtils.DATE_FIELD)) {
|
||||
String currentDate = DSLDateHelper.getReferenceDate(semanticCorrectInfo.getParseInfo().getModelId());
|
||||
sql = SqlParserAddHelper.addParenthesisToWhere(sql);
|
||||
sql = SqlParserAddHelper.addWhere(sql, DateUtils.DATE_FIELD, currentDate);
|
||||
private void addDateIfNotExist(SemanticParseInfo semanticParseInfo) {
|
||||
String correctS2SQL = semanticParseInfo.getSqlInfo().getCorrectS2SQL();
|
||||
List<String> whereFields = SqlParserSelectHelper.getWhereFields(correctS2SQL);
|
||||
if (CollectionUtils.isEmpty(whereFields) || !TimeDimensionEnum.containsZhTimeDimension(whereFields)) {
|
||||
String currentDate = S2SqlDateHelper.getReferenceDate(semanticParseInfo.getModelId());
|
||||
if (StringUtils.isNotBlank(currentDate)) {
|
||||
correctS2SQL = SqlParserAddHelper.addParenthesisToWhere(correctS2SQL);
|
||||
correctS2SQL = SqlParserAddHelper.addWhere(
|
||||
correctS2SQL, TimeDimensionEnum.DAY.getChName(), currentDate);
|
||||
}
|
||||
}
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(correctS2SQL);
|
||||
}
|
||||
|
||||
private String getQueryFilter(QueryFilters queryFilters) {
|
||||
@@ -93,21 +100,19 @@ public class WhereCorrector extends BaseSemanticCorrector {
|
||||
.collect(Collectors.joining(Constants.AND_UPPER));
|
||||
}
|
||||
|
||||
private void updateFieldValueByTechName(SemanticCorrectInfo semanticCorrectInfo) {
|
||||
private void updateFieldValueByTechName(SemanticParseInfo semanticParseInfo) {
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
Long modelId = semanticCorrectInfo.getParseInfo().getModel().getId();
|
||||
List<SchemaElement> dimensions = semanticSchema.getDimensions().stream()
|
||||
.filter(schemaElement -> modelId.equals(schemaElement.getModel()))
|
||||
.collect(Collectors.toList());
|
||||
Set<Long> modelIds = semanticParseInfo.getModel().getModelIds();
|
||||
List<SchemaElement> dimensions = semanticSchema.getDimensions(modelIds);
|
||||
|
||||
if (CollectionUtils.isEmpty(dimensions)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Map<String, String>> aliasAndBizNameToTechName = getAliasAndBizNameToTechName(dimensions);
|
||||
String sql = SqlParserReplaceHelper.replaceValue(semanticCorrectInfo.getSql(), aliasAndBizNameToTechName);
|
||||
semanticCorrectInfo.setSql(sql);
|
||||
return;
|
||||
String correctS2SQL = SqlParserReplaceHelper.replaceValue(semanticParseInfo.getSqlInfo().getCorrectS2SQL(),
|
||||
aliasAndBizNameToTechName);
|
||||
semanticParseInfo.getSqlInfo().setCorrectS2SQL(correctS2SQL);
|
||||
}
|
||||
|
||||
private Map<String, Map<String, String>> getAliasAndBizNameToTechName(List<SchemaElement> dimensions) {
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.service.SemanticService;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
|
||||
@Slf4j
|
||||
public abstract class BaseMapper implements SchemaMapper {
|
||||
|
||||
@Override
|
||||
public void map(QueryContext queryContext) {
|
||||
|
||||
String simpleName = this.getClass().getSimpleName();
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.info("before {},mapInfo:{}", simpleName, queryContext.getMapInfo().getModelElementMatches());
|
||||
|
||||
try {
|
||||
doMap(queryContext);
|
||||
} catch (Exception e) {
|
||||
log.error("work error", e);
|
||||
}
|
||||
|
||||
long cost = System.currentTimeMillis() - startTime;
|
||||
log.info("after {},cost:{},mapInfo:{}", simpleName, cost, queryContext.getMapInfo().getModelElementMatches());
|
||||
}
|
||||
|
||||
public abstract void doMap(QueryContext queryContext);
|
||||
|
||||
public void addToSchemaMap(SchemaMapInfo schemaMap, Long modelId, SchemaElementMatch newElementMatch) {
|
||||
Map<Long, List<SchemaElementMatch>> modelElementMatches = schemaMap.getModelElementMatches();
|
||||
List<SchemaElementMatch> schemaElementMatches = modelElementMatches.putIfAbsent(modelId, new ArrayList<>());
|
||||
if (schemaElementMatches == null) {
|
||||
schemaElementMatches = modelElementMatches.get(modelId);
|
||||
}
|
||||
//remove duplication
|
||||
AtomicBoolean needAddNew = new AtomicBoolean(true);
|
||||
schemaElementMatches.removeIf(
|
||||
existElementMatch -> {
|
||||
SchemaElement existElement = existElementMatch.getElement();
|
||||
SchemaElement newElement = newElementMatch.getElement();
|
||||
if (existElement.equals(newElement)) {
|
||||
if (newElementMatch.getSimilarity() > existElementMatch.getSimilarity()) {
|
||||
return true;
|
||||
} else {
|
||||
needAddNew.set(false);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
);
|
||||
if (needAddNew.get()) {
|
||||
schemaElementMatches.add(newElementMatch);
|
||||
}
|
||||
}
|
||||
|
||||
public SchemaElement getSchemaElement(Long modelId, SchemaElementType elementType, Long elementID) {
|
||||
SchemaElement element = new SchemaElement();
|
||||
SemanticService schemaService = ContextUtils.getBean(SemanticService.class);
|
||||
ModelSchema modelSchema = schemaService.getModelSchema(modelId);
|
||||
if (Objects.isNull(modelSchema)) {
|
||||
return null;
|
||||
}
|
||||
SchemaElement elementDb = modelSchema.getElement(elementType, elementID);
|
||||
if (Objects.isNull(elementDb)) {
|
||||
log.info("element is null, elementType:{},elementID:{}", elementType, elementID);
|
||||
return null;
|
||||
}
|
||||
BeanUtils.copyProperties(elementDb, element);
|
||||
element.setAlias(getAlias(elementDb));
|
||||
return element;
|
||||
}
|
||||
|
||||
public List<String> getAlias(SchemaElement element) {
|
||||
if (!SchemaElementType.VALUE.equals(element.getType())) {
|
||||
return element.getAlias();
|
||||
}
|
||||
if (org.apache.commons.collections.CollectionUtils.isNotEmpty(element.getAlias()) && StringUtils.isNotEmpty(
|
||||
element.getName())) {
|
||||
return element.getAlias().stream()
|
||||
.filter(aliasItem -> aliasItem.contains(element.getName()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
return element.getAlias();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,154 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.knowledge.utils.NatureHelper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@Slf4j
|
||||
public abstract class BaseMatchStrategy<T> implements MatchStrategy<T> {
|
||||
|
||||
@Autowired
|
||||
private MapperHelper mapperHelper;
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<T>> match(QueryContext queryContext, List<Term> terms, Set<Long> detectModelIds) {
|
||||
String text = queryContext.getRequest().getQueryText();
|
||||
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("terms:{},,detectModelIds:{}", terms, detectModelIds);
|
||||
|
||||
List<T> detects = detect(queryContext, terms, detectModelIds);
|
||||
Map<MatchText, List<T>> result = new HashMap<>();
|
||||
|
||||
result.put(MatchText.builder().regText(text).detectSegment(text).build(), detects);
|
||||
return result;
|
||||
}
|
||||
|
||||
public List<T> detect(QueryContext queryContext, List<Term> terms, Set<Long> detectModelIds) {
|
||||
Map<Integer, Integer> regOffsetToLength = getRegOffsetToLength(terms);
|
||||
String text = queryContext.getRequest().getQueryText();
|
||||
Set<T> results = new HashSet<>();
|
||||
|
||||
Set<String> detectSegments = new HashSet<>();
|
||||
|
||||
for (Integer startIndex = 0; startIndex <= text.length() - 1; ) {
|
||||
|
||||
for (Integer index = startIndex; index <= text.length(); ) {
|
||||
int offset = mapperHelper.getStepOffset(terms, startIndex);
|
||||
index = mapperHelper.getStepIndex(regOffsetToLength, index);
|
||||
if (index <= text.length()) {
|
||||
String detectSegment = text.substring(startIndex, index);
|
||||
detectSegments.add(detectSegment);
|
||||
detectByStep(queryContext, results, detectModelIds, startIndex, index, offset);
|
||||
}
|
||||
}
|
||||
startIndex = mapperHelper.getStepIndex(regOffsetToLength, startIndex);
|
||||
}
|
||||
detectByBatch(queryContext, results, detectModelIds, detectSegments);
|
||||
return new ArrayList<>(results);
|
||||
}
|
||||
|
||||
protected void detectByBatch(QueryContext queryContext, Set<T> results, Set<Long> detectModelIds,
|
||||
Set<String> detectSegments) {
|
||||
return;
|
||||
}
|
||||
|
||||
public Map<Integer, Integer> getRegOffsetToLength(List<Term> terms) {
|
||||
return terms.stream().sorted(Comparator.comparing(Term::length))
|
||||
.collect(Collectors.toMap(Term::getOffset, term -> term.word.length(), (value1, value2) -> value2));
|
||||
}
|
||||
|
||||
public void selectResultInOneRound(Set<T> existResults, List<T> oneRoundResults) {
|
||||
if (CollectionUtils.isEmpty(oneRoundResults)) {
|
||||
return;
|
||||
}
|
||||
for (T oneRoundResult : oneRoundResults) {
|
||||
if (existResults.contains(oneRoundResult)) {
|
||||
boolean isDeleted = existResults.removeIf(
|
||||
existResult -> {
|
||||
boolean delete = needDelete(oneRoundResult, existResult);
|
||||
if (delete) {
|
||||
log.info("deleted existResult:{}", existResult);
|
||||
}
|
||||
return delete;
|
||||
}
|
||||
);
|
||||
if (isDeleted) {
|
||||
log.info("deleted, add oneRoundResult:{}", oneRoundResult);
|
||||
existResults.add(oneRoundResult);
|
||||
}
|
||||
} else {
|
||||
existResults.add(oneRoundResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<T> getMatches(QueryContext queryContext, List<Term> terms) {
|
||||
Set<Long> detectModelIds = mapperHelper.getModelIds(queryContext.getRequest());
|
||||
terms = filterByModelIds(terms, detectModelIds);
|
||||
Map<MatchText, List<T>> matchResult = match(queryContext, terms, detectModelIds);
|
||||
List<T> matches = new ArrayList<>();
|
||||
if (Objects.isNull(matchResult)) {
|
||||
return matches;
|
||||
}
|
||||
Optional<List<T>> first = matchResult.entrySet().stream()
|
||||
.filter(entry -> CollectionUtils.isNotEmpty(entry.getValue()))
|
||||
.map(entry -> entry.getValue()).findFirst();
|
||||
|
||||
if (first.isPresent()) {
|
||||
matches = first.get();
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
public List<Term> filterByModelIds(List<Term> terms, Set<Long> detectModelIds) {
|
||||
logTerms(terms);
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds)) {
|
||||
terms = terms.stream().filter(term -> {
|
||||
Long modelId = NatureHelper.getModelId(term.getNature().toString());
|
||||
if (Objects.nonNull(modelId)) {
|
||||
return detectModelIds.contains(modelId);
|
||||
}
|
||||
return false;
|
||||
}).collect(Collectors.toList());
|
||||
log.info("terms filter by modelIds:{}", detectModelIds);
|
||||
logTerms(terms);
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
public void logTerms(List<Term> terms) {
|
||||
if (CollectionUtils.isEmpty(terms)) {
|
||||
return;
|
||||
}
|
||||
for (Term term : terms) {
|
||||
log.debug("word:{},nature:{},frequency:{}", term.word, term.nature.toString(), term.getFrequency());
|
||||
}
|
||||
}
|
||||
|
||||
public abstract boolean needDelete(T oneRoundResult, T existResult);
|
||||
|
||||
public abstract String getMapKey(T a);
|
||||
|
||||
public abstract void detectByStep(QueryContext queryContext, Set<T> results,
|
||||
Set<Long> detectModelIds, Integer startIndex, Integer index, int offset);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,130 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DatabaseMapResult;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/**
|
||||
* DatabaseMatchStrategy uses SQL LIKE operator to match schema elements.
|
||||
* It currently supports fuzzy matching against names and aliases.
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DatabaseMatchStrategy extends BaseMatchStrategy<DatabaseMapResult> {
|
||||
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
@Autowired
|
||||
private MapperHelper mapperHelper;
|
||||
@Autowired
|
||||
private SchemaService schemaService;
|
||||
private List<SchemaElement> allElements;
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<DatabaseMapResult>> match(QueryContext queryContext, List<Term> terms,
|
||||
Set<Long> detectModelIds) {
|
||||
this.allElements = getSchemaElements();
|
||||
return super.match(queryContext, terms, detectModelIds);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needDelete(DatabaseMapResult oneRoundResult, DatabaseMapResult existResult) {
|
||||
return getMapKey(oneRoundResult).equals(getMapKey(existResult))
|
||||
&& existResult.getDetectWord().length() < oneRoundResult.getDetectWord().length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMapKey(DatabaseMapResult a) {
|
||||
return a.getName() + Constants.UNDERLINE + a.getSchemaElement().getId()
|
||||
+ Constants.UNDERLINE + a.getSchemaElement().getName();
|
||||
}
|
||||
|
||||
public void detectByStep(QueryContext queryContext, Set<DatabaseMapResult> existResults, Set<Long> detectModelIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
String detectSegment = queryContext.getRequest().getQueryText().substring(startIndex, index);
|
||||
if (StringUtils.isBlank(detectSegment)) {
|
||||
return;
|
||||
}
|
||||
Set<Long> modelIds = mapperHelper.getModelIds(queryContext.getRequest());
|
||||
|
||||
Double metricDimensionThresholdConfig = getThreshold(queryContext);
|
||||
|
||||
Map<String, Set<SchemaElement>> nameToItems = getNameToItems(allElements);
|
||||
|
||||
for (Entry<String, Set<SchemaElement>> entry : nameToItems.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
if (!name.contains(detectSegment)
|
||||
|| mapperHelper.getSimilarity(detectSegment, name) < metricDimensionThresholdConfig) {
|
||||
continue;
|
||||
}
|
||||
Set<SchemaElement> schemaElements = entry.getValue();
|
||||
if (!CollectionUtils.isEmpty(modelIds)) {
|
||||
schemaElements = schemaElements.stream()
|
||||
.filter(schemaElement -> modelIds.contains(schemaElement.getModel()))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
for (SchemaElement schemaElement : schemaElements) {
|
||||
DatabaseMapResult databaseMapResult = new DatabaseMapResult();
|
||||
databaseMapResult.setDetectWord(detectSegment);
|
||||
databaseMapResult.setName(schemaElement.getName());
|
||||
databaseMapResult.setSchemaElement(schemaElement);
|
||||
existResults.add(databaseMapResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<SchemaElement> getSchemaElements() {
|
||||
List<SchemaElement> allElements = new ArrayList<>();
|
||||
allElements.addAll(schemaService.getSemanticSchema().getDimensions());
|
||||
allElements.addAll(schemaService.getSemanticSchema().getMetrics());
|
||||
return allElements;
|
||||
}
|
||||
|
||||
private Double getThreshold(QueryContext queryContext) {
|
||||
Double metricDimensionThresholdConfig = optimizationConfig.getMetricDimensionThresholdConfig();
|
||||
Double metricDimensionMinThresholdConfig = optimizationConfig.getMetricDimensionMinThresholdConfig();
|
||||
|
||||
Map<Long, List<SchemaElementMatch>> modelElementMatches = queryContext.getMapInfo().getModelElementMatches();
|
||||
|
||||
boolean existElement = modelElementMatches.entrySet().stream().anyMatch(entry -> entry.getValue().size() >= 1);
|
||||
|
||||
if (!existElement) {
|
||||
double halfThreshold = metricDimensionThresholdConfig / 2;
|
||||
|
||||
metricDimensionThresholdConfig = halfThreshold >= metricDimensionMinThresholdConfig ? halfThreshold
|
||||
: metricDimensionMinThresholdConfig;
|
||||
log.info("ModelElementMatches:{} , not exist Element metricDimensionThresholdConfig reduce by half:{}",
|
||||
modelElementMatches, metricDimensionThresholdConfig);
|
||||
}
|
||||
return metricDimensionThresholdConfig;
|
||||
}
|
||||
|
||||
private Map<String, Set<SchemaElement>> getNameToItems(List<SchemaElement> models) {
|
||||
return models.stream().collect(
|
||||
Collectors.toMap(SchemaElement::getName, a -> {
|
||||
Set<SchemaElement> result = new HashSet<>();
|
||||
result.add(a);
|
||||
return result;
|
||||
}, (k1, k2) -> {
|
||||
k1.addAll(k2);
|
||||
return k1;
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import com.tencent.supersonic.knowledge.dictionary.EmbeddingResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.BaseWordBuilder;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
/***
|
||||
* A mapper that recognizes schema elements with vector embedding.
|
||||
*/
|
||||
@Slf4j
|
||||
public class EmbeddingMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public void doMap(QueryContext queryContext) {
|
||||
//1. query from embedding by queryText
|
||||
String queryText = queryContext.getRequest().getQueryText();
|
||||
List<Term> terms = HanlpHelper.getTerms(queryText);
|
||||
|
||||
EmbeddingMatchStrategy matchStrategy = ContextUtils.getBean(EmbeddingMatchStrategy.class);
|
||||
List<EmbeddingResult> matchResults = matchStrategy.getMatches(queryContext, terms);
|
||||
|
||||
HanlpHelper.transLetterOriginal(matchResults);
|
||||
|
||||
//2. build SchemaElementMatch by info
|
||||
for (EmbeddingResult matchResult : matchResults) {
|
||||
Long elementId = Retrieval.getLongId(matchResult.getId());
|
||||
|
||||
SchemaElement schemaElement = JSONObject.parseObject(JSONObject.toJSONString(matchResult.getMetadata()),
|
||||
SchemaElement.class);
|
||||
|
||||
String modelIdStr = matchResult.getMetadata().get("modelId");
|
||||
if (StringUtils.isBlank(modelIdStr)) {
|
||||
continue;
|
||||
}
|
||||
long modelId = Long.parseLong(modelIdStr);
|
||||
schemaElement = getSchemaElement(modelId, schemaElement.getType(), elementId);
|
||||
if (schemaElement == null) {
|
||||
continue;
|
||||
}
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(schemaElement)
|
||||
.frequency(BaseWordBuilder.DEFAULT_FREQUENCY)
|
||||
.word(matchResult.getName())
|
||||
.similarity(1 - matchResult.getDistance())
|
||||
.detectWord(matchResult.getDetectWord())
|
||||
.build();
|
||||
//3. add to mapInfo
|
||||
addToSchemaMap(queryContext.getMapInfo(), modelId, schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,137 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.util.ComponentFactory;
|
||||
import com.tencent.supersonic.common.util.embedding.Retrieval;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQuery;
|
||||
import com.tencent.supersonic.common.util.embedding.RetrieveQueryResult;
|
||||
import com.tencent.supersonic.common.util.embedding.S2EmbeddingStore;
|
||||
import com.tencent.supersonic.knowledge.dictionary.EmbeddingResult;
|
||||
import com.tencent.supersonic.semantic.model.domain.listener.MetaEmbeddingListener;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* EmbeddingMatchStrategy uses vector database to perform
|
||||
* similarity search against the embeddings of schema elements.
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class EmbeddingMatchStrategy extends BaseMatchStrategy<EmbeddingResult> {
|
||||
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
|
||||
private S2EmbeddingStore s2EmbeddingStore = ComponentFactory.getS2EmbeddingStore();
|
||||
|
||||
@Override
|
||||
public boolean needDelete(EmbeddingResult oneRoundResult, EmbeddingResult existResult) {
|
||||
return getMapKey(oneRoundResult).equals(getMapKey(existResult))
|
||||
&& existResult.getDistance() > oneRoundResult.getDistance();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMapKey(EmbeddingResult a) {
|
||||
return a.getName() + Constants.UNDERLINE + a.getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void detectByBatch(QueryContext queryContext, Set<EmbeddingResult> results, Set<Long> detectModelIds,
|
||||
Set<String> detectSegments) {
|
||||
|
||||
List<String> queryTextsList = detectSegments.stream()
|
||||
.map(detectSegment -> detectSegment.trim())
|
||||
.filter(detectSegment -> StringUtils.isNotBlank(detectSegment)
|
||||
&& detectSegment.length() >= optimizationConfig.getEmbeddingMapperWordMin()
|
||||
&& detectSegment.length() <= optimizationConfig.getEmbeddingMapperWordMax())
|
||||
.collect(Collectors.toList());
|
||||
|
||||
List<List<String>> queryTextsSubList = Lists.partition(queryTextsList,
|
||||
optimizationConfig.getEmbeddingMapperBatch());
|
||||
|
||||
for (List<String> queryTextsSub : queryTextsSubList) {
|
||||
detectByQueryTextsSub(results, detectModelIds, queryTextsSub);
|
||||
}
|
||||
}
|
||||
|
||||
private void detectByQueryTextsSub(Set<EmbeddingResult> results, Set<Long> detectModelIds,
|
||||
List<String> queryTextsSub) {
|
||||
int embeddingNumber = optimizationConfig.getEmbeddingMapperNumber();
|
||||
Double distance = optimizationConfig.getEmbeddingMapperDistanceThreshold();
|
||||
Map<String, String> filterCondition = null;
|
||||
// step1. build query params
|
||||
// if only one modelId, add to filterCondition
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds) && detectModelIds.size() == 1) {
|
||||
filterCondition = new HashMap<>();
|
||||
filterCondition.put("modelId", detectModelIds.stream().findFirst().get().toString());
|
||||
}
|
||||
|
||||
RetrieveQuery retrieveQuery = RetrieveQuery.builder()
|
||||
.queryTextsList(queryTextsSub)
|
||||
.filterCondition(filterCondition)
|
||||
.queryEmbeddings(null)
|
||||
.build();
|
||||
// step2. retrieveQuery by detectSegment
|
||||
List<RetrieveQueryResult> retrieveQueryResults = s2EmbeddingStore.retrieveQuery(
|
||||
MetaEmbeddingListener.COLLECTION_NAME, retrieveQuery, embeddingNumber);
|
||||
|
||||
if (CollectionUtils.isEmpty(retrieveQueryResults)) {
|
||||
return;
|
||||
}
|
||||
// step3. build EmbeddingResults. filter by modelId
|
||||
List<EmbeddingResult> collect = retrieveQueryResults.stream()
|
||||
.map(retrieveQueryResult -> {
|
||||
List<Retrieval> retrievals = retrieveQueryResult.getRetrieval();
|
||||
if (CollectionUtils.isNotEmpty(retrievals)) {
|
||||
retrievals.removeIf(retrieval -> retrieval.getDistance() > distance.doubleValue());
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds)) {
|
||||
retrievals.removeIf(retrieval -> {
|
||||
String modelIdStr = retrieval.getMetadata().get("modelId").toString();
|
||||
if (StringUtils.isBlank(modelIdStr)) {
|
||||
return true;
|
||||
}
|
||||
return detectModelIds.contains(Long.parseLong(modelIdStr));
|
||||
});
|
||||
}
|
||||
}
|
||||
return retrieveQueryResult;
|
||||
})
|
||||
.filter(retrieveQueryResult -> CollectionUtils.isNotEmpty(retrieveQueryResult.getRetrieval()))
|
||||
.flatMap(retrieveQueryResult -> retrieveQueryResult.getRetrieval().stream()
|
||||
.map(retrieval -> {
|
||||
EmbeddingResult embeddingResult = new EmbeddingResult();
|
||||
BeanUtils.copyProperties(retrieval, embeddingResult);
|
||||
embeddingResult.setDetectWord(retrieveQueryResult.getQuery());
|
||||
embeddingResult.setName(retrieval.getQuery());
|
||||
return embeddingResult;
|
||||
}))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// step4. select mapResul in one round
|
||||
int roundNumber = optimizationConfig.getEmbeddingMapperRoundNumber() * queryTextsSub.size();
|
||||
List<EmbeddingResult> oneRoundResults = collect.stream()
|
||||
.sorted(Comparator.comparingDouble(EmbeddingResult::getDistance))
|
||||
.limit(roundNumber)
|
||||
.collect(Collectors.toList());
|
||||
selectResultInOneRound(results, oneRoundResults);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void detectByStep(QueryContext queryContext, Set<EmbeddingResult> existResults, Set<Long> detectModelIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1,27 +1,29 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.service.SemanticService;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
* A mapper capable of converting the VALUE of entity dimension values into ID types.
|
||||
*/
|
||||
@Slf4j
|
||||
public class EntityMapper implements SchemaMapper {
|
||||
public class EntityMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public void map(QueryContext queryContext) {
|
||||
public void doMap(QueryContext queryContext) {
|
||||
SchemaMapInfo schemaMapInfo = queryContext.getMapInfo();
|
||||
for (Long modelId : schemaMapInfo.getMatchedModels()) {
|
||||
List<SchemaElementMatch> schemaElementMatchList = schemaMapInfo.getMatchedElements(modelId);
|
||||
@@ -32,8 +34,9 @@ public class EntityMapper implements SchemaMapper {
|
||||
if (entity == null || entity.getId() == null) {
|
||||
continue;
|
||||
}
|
||||
List<SchemaElementMatch> valueSchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch ->
|
||||
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()))
|
||||
List<SchemaElementMatch> valueSchemaElements = schemaElementMatchList.stream()
|
||||
.filter(schemaElementMatch ->
|
||||
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType()))
|
||||
.collect(Collectors.toList());
|
||||
for (SchemaElementMatch schemaElementMatch : valueSchemaElements) {
|
||||
if (!entity.getId().equals(schemaElementMatch.getElement().getId())) {
|
||||
@@ -51,7 +54,7 @@ public class EntityMapper implements SchemaMapper {
|
||||
}
|
||||
|
||||
private boolean checkExistSameEntitySchemaElements(SchemaElementMatch valueSchemaElementMatch,
|
||||
List<SchemaElementMatch> schemaElementMatchList) {
|
||||
List<SchemaElementMatch> schemaElementMatchList) {
|
||||
List<SchemaElementMatch> entitySchemaElements = schemaElementMatchList.stream().filter(schemaElementMatch ->
|
||||
SchemaElementType.ENTITY.equals(schemaElementMatch.getElement().getType()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public class FuzzyNameMapper implements SchemaMapper {
|
||||
|
||||
@Override
|
||||
public void map(QueryContext queryContext) {
|
||||
|
||||
log.debug("before db mapper,mapInfo:{}", queryContext.getMapInfo());
|
||||
|
||||
List<Term> terms = HanlpHelper.getTerms(queryContext.getRequest().getQueryText());
|
||||
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
|
||||
detectAndAddToSchema(queryContext, terms, semanticSchema.getDimensions(), SchemaElementType.DIMENSION);
|
||||
|
||||
detectAndAddToSchema(queryContext, terms, semanticSchema.getMetrics(), SchemaElementType.METRIC);
|
||||
|
||||
log.debug("after db mapper,mapInfo:{}", queryContext.getMapInfo());
|
||||
}
|
||||
|
||||
private void detectAndAddToSchema(QueryContext queryContext, List<Term> terms, List<SchemaElement> models,
|
||||
SchemaElementType schemaElementType) {
|
||||
try {
|
||||
|
||||
Map<String, Set<SchemaElement>> modelResultSet = getResultSet(queryContext, terms, models);
|
||||
|
||||
addToSchemaMapInfo(modelResultSet, queryContext.getMapInfo(), schemaElementType);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("detectAndAddToSchema error", e);
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Set<SchemaElement>> getResultSet(QueryContext queryContext, List<Term> terms,
|
||||
List<SchemaElement> models) {
|
||||
|
||||
String queryText = queryContext.getRequest().getQueryText();
|
||||
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
Set<Long> modelIds = mapperHelper.getModelIds(queryContext.getRequest());
|
||||
|
||||
Double metricDimensionThresholdConfig = getThreshold(queryContext, mapperHelper);
|
||||
|
||||
Map<String, Set<SchemaElement>> nameToItems = getNameToItems(models);
|
||||
|
||||
Map<Integer, Integer> regOffsetToLength = terms.stream().sorted(Comparator.comparing(Term::length))
|
||||
.collect(Collectors.toMap(Term::getOffset, term -> term.word.length(), (value1, value2) -> value2));
|
||||
|
||||
Map<String, Set<SchemaElement>> modelResultSet = new HashMap<>();
|
||||
for (Integer startIndex = 0; startIndex <= queryText.length() - 1; ) {
|
||||
for (Integer endIndex = startIndex; endIndex <= queryText.length(); ) {
|
||||
endIndex = mapperHelper.getStepIndex(regOffsetToLength, endIndex);
|
||||
if (endIndex > queryText.length()) {
|
||||
continue;
|
||||
}
|
||||
String detectSegment = queryText.substring(startIndex, endIndex);
|
||||
|
||||
for (Entry<String, Set<SchemaElement>> entry : nameToItems.entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Set<SchemaElement> schemaElements = entry.getValue();
|
||||
if (!name.contains(detectSegment)
|
||||
|| mapperHelper.getSimilarity(detectSegment, name) < metricDimensionThresholdConfig) {
|
||||
continue;
|
||||
}
|
||||
if (!CollectionUtils.isEmpty(modelIds)) {
|
||||
schemaElements = schemaElements.stream()
|
||||
.filter(schemaElement -> modelIds.contains(schemaElement.getModel()))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
Set<SchemaElement> preSchemaElements = modelResultSet.putIfAbsent(detectSegment, schemaElements);
|
||||
if (Objects.nonNull(preSchemaElements)) {
|
||||
preSchemaElements.addAll(schemaElements);
|
||||
}
|
||||
}
|
||||
}
|
||||
startIndex = mapperHelper.getStepIndex(regOffsetToLength, startIndex);
|
||||
}
|
||||
return modelResultSet;
|
||||
}
|
||||
|
||||
private Double getThreshold(QueryContext queryContext, MapperHelper mapperHelper) {
|
||||
|
||||
OptimizationConfig optimizationConfig = ContextUtils.getBean(OptimizationConfig.class);
|
||||
Double metricDimensionThresholdConfig = optimizationConfig.getMetricDimensionThresholdConfig();
|
||||
Double metricDimensionMinThresholdConfig = optimizationConfig.getMetricDimensionMinThresholdConfig();
|
||||
|
||||
Map<Long, List<SchemaElementMatch>> modelElementMatches = queryContext.getMapInfo()
|
||||
.getModelElementMatches();
|
||||
boolean existElement = modelElementMatches.entrySet().stream()
|
||||
.anyMatch(entry -> entry.getValue().size() >= 1);
|
||||
|
||||
if (!existElement) {
|
||||
double halfThreshold = metricDimensionThresholdConfig / 2;
|
||||
|
||||
metricDimensionThresholdConfig = halfThreshold >= metricDimensionMinThresholdConfig ? halfThreshold
|
||||
: metricDimensionMinThresholdConfig;
|
||||
log.info("ModelElementMatches:{} , not exist Element metricDimensionThresholdConfig reduce by half:{}",
|
||||
modelElementMatches, metricDimensionThresholdConfig);
|
||||
}
|
||||
return metricDimensionThresholdConfig;
|
||||
}
|
||||
|
||||
private Map<String, Set<SchemaElement>> getNameToItems(List<SchemaElement> models) {
|
||||
return models.stream().collect(
|
||||
Collectors.toMap(SchemaElement::getName, a -> {
|
||||
Set<SchemaElement> result = new HashSet<>();
|
||||
result.add(a);
|
||||
return result;
|
||||
}, (k1, k2) -> {
|
||||
k1.addAll(k2);
|
||||
return k1;
|
||||
}));
|
||||
}
|
||||
|
||||
private void addToSchemaMapInfo(Map<String, Set<SchemaElement>> mapResultRowSet, SchemaMapInfo schemaMap,
|
||||
SchemaElementType schemaElementType) {
|
||||
if (Objects.isNull(mapResultRowSet) || mapResultRowSet.size() <= 0) {
|
||||
return;
|
||||
}
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
|
||||
for (Map.Entry<String, Set<SchemaElement>> entry : mapResultRowSet.entrySet()) {
|
||||
String detectWord = entry.getKey();
|
||||
Set<SchemaElement> schemaElements = entry.getValue();
|
||||
for (SchemaElement schemaElement : schemaElements) {
|
||||
|
||||
List<SchemaElementMatch> elements = schemaMap.getMatchedElements(schemaElement.getModel());
|
||||
if (CollectionUtils.isEmpty(elements)) {
|
||||
elements = new ArrayList<>();
|
||||
schemaMap.setMatchedElements(schemaElement.getModel(), elements);
|
||||
}
|
||||
Set<Long> regElementSet = elements.stream()
|
||||
.filter(elementMatch -> schemaElementType.equals(elementMatch.getElement().getType()))
|
||||
.map(elementMatch -> elementMatch.getElement().getId())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
if (regElementSet.contains(schemaElement.getId())) {
|
||||
continue;
|
||||
}
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(schemaElement)
|
||||
.word(schemaElement.getName())
|
||||
.detectWord(detectWord)
|
||||
.frequency(10000L)
|
||||
.similarity(mapperHelper.getSimilarity(detectWord, schemaElement.getName()))
|
||||
.build();
|
||||
log.info("schemaElementType:{},add to schema, elementMatch {}", schemaElementType, schemaElementMatch);
|
||||
elements.add(schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,162 +0,0 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.service.SemanticService;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.BaseWordBuilder;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.WordBuilderFactory;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import com.tencent.supersonic.knowledge.utils.NatureHelper;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.BeanUtils;
|
||||
|
||||
@Slf4j
|
||||
public class HanlpDictMapper implements SchemaMapper {
|
||||
|
||||
@Override
|
||||
public void map(QueryContext queryContext) {
|
||||
|
||||
String queryText = queryContext.getRequest().getQueryText();
|
||||
List<Term> terms = HanlpHelper.getTerms(queryText);
|
||||
|
||||
QueryMatchStrategy matchStrategy = ContextUtils.getBean(QueryMatchStrategy.class);
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
Set<Long> detectModelIds = mapperHelper.getModelIds(queryContext.getRequest());
|
||||
|
||||
terms = filterByModelIds(terms, detectModelIds);
|
||||
|
||||
Map<MatchText, List<MapResult>> matchResult = matchStrategy.match(queryContext.getRequest(), terms,
|
||||
detectModelIds);
|
||||
|
||||
List<MapResult> matches = getMatches(matchResult);
|
||||
|
||||
HanlpHelper.transLetterOriginal(matches);
|
||||
|
||||
log.info("queryContext:{},matches:{}", queryContext, matches);
|
||||
|
||||
convertTermsToSchemaMapInfo(matches, queryContext.getMapInfo(), terms);
|
||||
}
|
||||
|
||||
private List<Term> filterByModelIds(List<Term> terms, Set<Long> detectModelIds) {
|
||||
for (Term term : terms) {
|
||||
log.info("before word:{},nature:{},frequency:{}", term.word, term.nature.toString(), term.getFrequency());
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(detectModelIds)) {
|
||||
terms = terms.stream().filter(term -> {
|
||||
Long modelId = NatureHelper.getModelId(term.getNature().toString());
|
||||
if (Objects.nonNull(modelId)) {
|
||||
return detectModelIds.contains(modelId);
|
||||
}
|
||||
return false;
|
||||
}).collect(Collectors.toList());
|
||||
}
|
||||
for (Term term : terms) {
|
||||
log.info("after filter word:{},nature:{},frequency:{}", term.word, term.nature.toString(),
|
||||
term.getFrequency());
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
||||
|
||||
private void convertTermsToSchemaMapInfo(List<MapResult> mapResults, SchemaMapInfo schemaMap, List<Term> terms) {
|
||||
if (CollectionUtils.isEmpty(mapResults)) {
|
||||
return;
|
||||
}
|
||||
|
||||
Map<String, Long> wordNatureToFrequency = terms.stream().collect(
|
||||
Collectors.toMap(entry -> entry.getWord() + entry.getNature(),
|
||||
term -> Long.valueOf(term.getFrequency()), (value1, value2) -> value2));
|
||||
|
||||
for (MapResult mapResult : mapResults) {
|
||||
for (String nature : mapResult.getNatures()) {
|
||||
Long modelId = NatureHelper.getModelId(nature);
|
||||
if (Objects.isNull(modelId)) {
|
||||
continue;
|
||||
}
|
||||
SchemaElementType elementType = NatureHelper.convertToElementType(nature);
|
||||
if (Objects.isNull(elementType)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
SemanticService schemaService = ContextUtils.getBean(SemanticService.class);
|
||||
ModelSchema modelSchema = schemaService.getModelSchema(modelId);
|
||||
|
||||
BaseWordBuilder baseWordBuilder = WordBuilderFactory.get(DictWordType.getNatureType(nature));
|
||||
Long elementID = baseWordBuilder.getElementID(nature);
|
||||
Long frequency = wordNatureToFrequency.get(mapResult.getName() + nature);
|
||||
|
||||
SchemaElement elementDb = modelSchema.getElement(elementType, elementID);
|
||||
if (Objects.isNull(elementDb)) {
|
||||
log.info("element is null, elementType:{},elementID:{}", elementType, elementID);
|
||||
continue;
|
||||
}
|
||||
SchemaElement element = new SchemaElement();
|
||||
BeanUtils.copyProperties(elementDb, element);
|
||||
element.setAlias(getAlias(elementDb));
|
||||
if (element.getType().equals(SchemaElementType.VALUE)) {
|
||||
element.setName(mapResult.getName());
|
||||
}
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(element)
|
||||
.frequency(frequency)
|
||||
.word(mapResult.getName())
|
||||
.similarity(mapResult.getSimilarity())
|
||||
.detectWord(mapResult.getDetectWord())
|
||||
.build();
|
||||
|
||||
Map<Long, List<SchemaElementMatch>> modelElementMatches = schemaMap.getModelElementMatches();
|
||||
List<SchemaElementMatch> schemaElementMatches = modelElementMatches.putIfAbsent(modelId,
|
||||
new ArrayList<>());
|
||||
if (schemaElementMatches == null) {
|
||||
schemaElementMatches = modelElementMatches.get(modelId);
|
||||
}
|
||||
schemaElementMatches.add(schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<MapResult> getMatches(Map<MatchText, List<MapResult>> matchResult) {
|
||||
List<MapResult> matches = new ArrayList<>();
|
||||
if (Objects.isNull(matchResult)) {
|
||||
return matches;
|
||||
}
|
||||
Optional<List<MapResult>> first = matchResult.entrySet().stream()
|
||||
.filter(entry -> CollectionUtils.isNotEmpty(entry.getValue()))
|
||||
.map(entry -> entry.getValue()).findFirst();
|
||||
|
||||
if (first.isPresent()) {
|
||||
matches = first.get();
|
||||
}
|
||||
return matches;
|
||||
}
|
||||
|
||||
public List<String> getAlias(SchemaElement element) {
|
||||
if (!SchemaElementType.VALUE.equals(element.getType())) {
|
||||
return element.getAlias();
|
||||
}
|
||||
if (CollectionUtils.isNotEmpty(element.getAlias()) && StringUtils.isNotEmpty(element.getName())) {
|
||||
return element.getAlias().stream()
|
||||
.filter(aliasItem -> aliasItem.contains(element.getName()))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
return element.getAlias();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
||||
import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* HanlpDictMatchStrategy uses <a href="https://www.hanlp.com/">HanLP</a> to
|
||||
* match schema elements. It currently supports prefix and suffix matching
|
||||
* against names, values and aliases.
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class HanlpDictMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
@Autowired
|
||||
private MapperHelper mapperHelper;
|
||||
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<Term> terms,
|
||||
Set<Long> detectModelIds) {
|
||||
QueryReq queryReq = queryContext.getRequest();
|
||||
String text = queryReq.getQueryText();
|
||||
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("retryCount:{},terms:{},,detectModelIds:{}", terms, detectModelIds);
|
||||
|
||||
List<HanlpMapResult> detects = detect(queryContext, terms, detectModelIds);
|
||||
Map<MatchText, List<HanlpMapResult>> result = new HashMap<>();
|
||||
|
||||
result.put(MatchText.builder().regText(text).detectSegment(text).build(), detects);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needDelete(HanlpMapResult oneRoundResult, HanlpMapResult existResult) {
|
||||
return getMapKey(oneRoundResult).equals(getMapKey(existResult))
|
||||
&& existResult.getDetectWord().length() < oneRoundResult.getDetectWord().length();
|
||||
}
|
||||
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> existResults, Set<Long> detectModelIds,
|
||||
Integer startIndex, Integer index, int offset) {
|
||||
QueryReq queryReq = queryContext.getRequest();
|
||||
String text = queryReq.getQueryText();
|
||||
Integer agentId = queryReq.getAgentId();
|
||||
String detectSegment = text.substring(startIndex, index);
|
||||
|
||||
// step1. pre search
|
||||
Integer oneDetectionMaxSize = optimizationConfig.getOneDetectionMaxSize();
|
||||
LinkedHashSet<HanlpMapResult> hanlpMapResults = SearchService.prefixSearch(detectSegment, oneDetectionMaxSize,
|
||||
agentId,
|
||||
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
// step2. suffix search
|
||||
LinkedHashSet<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(detectSegment,
|
||||
oneDetectionMaxSize, agentId, detectModelIds).stream()
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
|
||||
if (CollectionUtils.isEmpty(hanlpMapResults)) {
|
||||
return;
|
||||
}
|
||||
// step3. merge pre/suffix result
|
||||
hanlpMapResults = hanlpMapResults.stream().sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
// step4. filter by similarity
|
||||
hanlpMapResults = hanlpMapResults.stream()
|
||||
.filter(term -> mapperHelper.getSimilarity(detectSegment, term.getName())
|
||||
>= mapperHelper.getThresholdMatch(term.getNatures()))
|
||||
.filter(term -> CollectionUtils.isNotEmpty(term.getNatures()))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
log.info("after isSimilarity parseResults:{}", hanlpMapResults);
|
||||
|
||||
hanlpMapResults = hanlpMapResults.stream().map(parseResult -> {
|
||||
parseResult.setOffset(offset);
|
||||
parseResult.setSimilarity(mapperHelper.getSimilarity(detectSegment, parseResult.getName()));
|
||||
return parseResult;
|
||||
}).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
// step5. take only one dimension or 10 metric/dimension value per rond.
|
||||
List<HanlpMapResult> dimensionMetrics = hanlpMapResults.stream()
|
||||
.filter(entry -> mapperHelper.existDimensionValues(entry.getNatures()))
|
||||
.collect(Collectors.toList())
|
||||
.stream()
|
||||
.limit(1)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Integer oneDetectionSize = optimizationConfig.getOneDetectionSize();
|
||||
List<HanlpMapResult> oneRoundResults = hanlpMapResults.stream().limit(oneDetectionSize)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (CollectionUtils.isNotEmpty(dimensionMetrics)) {
|
||||
oneRoundResults = dimensionMetrics;
|
||||
}
|
||||
// step6. select mapResul in one round
|
||||
selectResultInOneRound(existResults, oneRoundResults);
|
||||
}
|
||||
|
||||
public String getMapKey(HanlpMapResult a) {
|
||||
return a.getName() + Constants.UNDERLINE + String.join(Constants.UNDERLINE, a.getNatures());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.knowledge.dictionary.DatabaseMapResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
||||
import com.tencent.supersonic.knowledge.utils.HanlpHelper;
|
||||
import com.tencent.supersonic.knowledge.utils.NatureHelper;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
/***
|
||||
* A mapper that recognizes schema elements with keyword.
|
||||
* It leverages two matching strategies: HanlpDictMatchStrategy and DatabaseMatchStrategy.
|
||||
*/
|
||||
@Slf4j
|
||||
public class KeywordMapper extends BaseMapper {
|
||||
|
||||
@Override
|
||||
public void doMap(QueryContext queryContext) {
|
||||
String queryText = queryContext.getRequest().getQueryText();
|
||||
//1.hanlpDict Match
|
||||
List<Term> terms = HanlpHelper.getTerms(queryText);
|
||||
HanlpDictMatchStrategy hanlpMatchStrategy = ContextUtils.getBean(HanlpDictMatchStrategy.class);
|
||||
|
||||
List<HanlpMapResult> hanlpMapResults = hanlpMatchStrategy.getMatches(queryContext, terms);
|
||||
convertHanlpMapResultToMapInfo(hanlpMapResults, queryContext.getMapInfo(), terms);
|
||||
|
||||
//2.database Match
|
||||
DatabaseMatchStrategy databaseMatchStrategy = ContextUtils.getBean(DatabaseMatchStrategy.class);
|
||||
|
||||
List<DatabaseMapResult> databaseResults = databaseMatchStrategy.getMatches(queryContext, terms);
|
||||
convertDatabaseMapResultToMapInfo(queryContext, databaseResults);
|
||||
}
|
||||
|
||||
private void convertHanlpMapResultToMapInfo(List<HanlpMapResult> mapResults, SchemaMapInfo schemaMap,
|
||||
List<Term> terms) {
|
||||
if (CollectionUtils.isEmpty(mapResults)) {
|
||||
return;
|
||||
}
|
||||
HanlpHelper.transLetterOriginal(mapResults);
|
||||
Map<String, Long> wordNatureToFrequency = terms.stream().collect(
|
||||
Collectors.toMap(entry -> entry.getWord() + entry.getNature(),
|
||||
term -> Long.valueOf(term.getFrequency()), (value1, value2) -> value2));
|
||||
|
||||
for (HanlpMapResult hanlpMapResult : mapResults) {
|
||||
for (String nature : hanlpMapResult.getNatures()) {
|
||||
Long modelId = NatureHelper.getModelId(nature);
|
||||
if (Objects.isNull(modelId)) {
|
||||
continue;
|
||||
}
|
||||
SchemaElementType elementType = NatureHelper.convertToElementType(nature);
|
||||
if (Objects.isNull(elementType)) {
|
||||
continue;
|
||||
}
|
||||
Long elementID = NatureHelper.getElementID(nature);
|
||||
SchemaElement element = getSchemaElement(modelId, elementType, elementID);
|
||||
if (element == null) {
|
||||
continue;
|
||||
}
|
||||
if (element.getType().equals(SchemaElementType.VALUE)) {
|
||||
element.setName(hanlpMapResult.getName());
|
||||
}
|
||||
Long frequency = wordNatureToFrequency.get(hanlpMapResult.getName() + nature);
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(element)
|
||||
.frequency(frequency)
|
||||
.word(hanlpMapResult.getName())
|
||||
.similarity(hanlpMapResult.getSimilarity())
|
||||
.detectWord(hanlpMapResult.getDetectWord())
|
||||
.build();
|
||||
|
||||
addToSchemaMap(schemaMap, modelId, schemaElementMatch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void convertDatabaseMapResultToMapInfo(QueryContext queryContext, List<DatabaseMapResult> mapResults) {
|
||||
MapperHelper mapperHelper = ContextUtils.getBean(MapperHelper.class);
|
||||
for (DatabaseMapResult match : mapResults) {
|
||||
SchemaElement schemaElement = match.getSchemaElement();
|
||||
Set<Long> regElementSet = getRegElementSet(queryContext.getMapInfo(), schemaElement);
|
||||
if (regElementSet.contains(schemaElement.getId())) {
|
||||
continue;
|
||||
}
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(schemaElement)
|
||||
.word(schemaElement.getName())
|
||||
.detectWord(match.getDetectWord())
|
||||
.frequency(10000L)
|
||||
.similarity(mapperHelper.getSimilarity(match.getDetectWord(), schemaElement.getName()))
|
||||
.build();
|
||||
log.info("add to schema, elementMatch {}", schemaElementMatch);
|
||||
addToSchemaMap(queryContext.getMapInfo(), schemaElement.getModel(), schemaElementMatch);
|
||||
}
|
||||
}
|
||||
|
||||
private Set<Long> getRegElementSet(SchemaMapInfo schemaMap, SchemaElement schemaElement) {
|
||||
List<SchemaElementMatch> elements = schemaMap.getMatchedElements(schemaElement.getModel());
|
||||
if (CollectionUtils.isEmpty(elements)) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
return elements.stream()
|
||||
.filter(elementMatch ->
|
||||
SchemaElementType.METRIC.equals(elementMatch.getElement().getType())
|
||||
|| SchemaElementType.DIMENSION.equals(elementMatch.getElement().getType()))
|
||||
.map(elementMatch -> elementMatch.getElement().getId())
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
}
|
||||
@@ -1,11 +1,13 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.algorithm.EditDistance;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.chat.service.AgentService;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.knowledge.utils.NatureHelper;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -17,10 +19,6 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* Mapper helper
|
||||
*/
|
||||
|
||||
@Data
|
||||
@Service
|
||||
@Slf4j
|
||||
@@ -39,10 +37,13 @@ public class MapperHelper {
|
||||
return index;
|
||||
}
|
||||
|
||||
public Integer getStepOffset(List<Integer> termList, Integer index) {
|
||||
public Integer getStepOffset(List<Term> termList, Integer index) {
|
||||
List<Integer> offsetList = termList.stream().sorted(Comparator.comparing(Term::getOffset))
|
||||
.map(term -> term.getOffset()).collect(Collectors.toList());
|
||||
|
||||
for (int j = 0; j < termList.size() - 1; j++) {
|
||||
if (termList.get(j) <= index && termList.get(j + 1) > index) {
|
||||
return termList.get(j);
|
||||
if (offsetList.get(j) <= index && offsetList.get(j + 1) > index) {
|
||||
return offsetList.get(j);
|
||||
}
|
||||
}
|
||||
return index;
|
||||
@@ -88,7 +89,7 @@ public class MapperHelper {
|
||||
|
||||
AgentService agentService = ContextUtils.getBean(AgentService.class);
|
||||
|
||||
Set<Long> detectModelIds = agentService.getDslToolsModelIds(request.getAgentId(), null);
|
||||
Set<Long> detectModelIds = agentService.getModelIds(request.getAgentId(), null);
|
||||
//contains all
|
||||
if (agentService.containsAllModel(detectModelIds)) {
|
||||
if (Objects.nonNull(modelId) && modelId > 0) {
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* match strategy
|
||||
* MatchStrategy encapsulates a concrete matching algorithm
|
||||
* executed during query or search process.
|
||||
*/
|
||||
public interface MatchStrategy {
|
||||
public interface MatchStrategy<T> {
|
||||
|
||||
Map<MatchText, List<MapResult>> match(QueryReq queryReq, List<Term> terms, Set<Long> detectModelId);
|
||||
Map<MatchText, List<T>> match(QueryContext queryContext, List<Term> terms, Set<Long> detectModelId);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SchemaMapper;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaModelClusterMapInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.utils.ModelClusterBuilder;
|
||||
import com.tencent.supersonic.common.pojo.ModelCluster;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/***
|
||||
* ModelClusterMapper build a cluster from
|
||||
* connectable data models based on model-rela configuration
|
||||
* and generate SchemaModelClusterMapInfo
|
||||
*/
|
||||
public class ModelClusterMapper implements SchemaMapper {
|
||||
|
||||
@Override
|
||||
public void map(QueryContext queryContext) {
|
||||
SchemaService schemaService = ContextUtils.getBean(SchemaService.class);
|
||||
SemanticSchema semanticSchema = schemaService.getSemanticSchema();
|
||||
SchemaMapInfo schemaMapInfo = queryContext.getMapInfo();
|
||||
List<ModelCluster> modelClusters = buildModelClusterMatched(schemaMapInfo, semanticSchema);
|
||||
Map<String, List<SchemaElementMatch>> modelClusterElementMatches = new HashMap<>();
|
||||
for (ModelCluster modelCluster : modelClusters) {
|
||||
for (Long modelId : schemaMapInfo.getMatchedModels()) {
|
||||
if (modelCluster.getModelIds().contains(modelId)) {
|
||||
modelClusterElementMatches.computeIfAbsent(modelCluster.getKey(), k -> new ArrayList<>())
|
||||
.addAll(schemaMapInfo.getMatchedElements(modelId));
|
||||
}
|
||||
}
|
||||
}
|
||||
SchemaModelClusterMapInfo modelClusterMapInfo = new SchemaModelClusterMapInfo();
|
||||
modelClusterMapInfo.setModelElementMatches(modelClusterElementMatches);
|
||||
queryContext.setModelClusterMapInfo(modelClusterMapInfo);
|
||||
}
|
||||
|
||||
private List<ModelCluster> buildModelClusterMatched(SchemaMapInfo schemaMapInfo,
|
||||
SemanticSchema semanticSchema) {
|
||||
Set<Long> matchedModels = schemaMapInfo.getMatchedModels();
|
||||
List<ModelCluster> modelClusters = ModelClusterBuilder.buildModelClusters(semanticSchema);
|
||||
return modelClusters.stream().map(ModelCluster::getModelIds).peek(modelCluster -> {
|
||||
modelCluster.removeIf(model -> !matchedModels.contains(model));
|
||||
}).filter(modelCluster -> modelCluster.size() > 0).map(ModelCluster::build).collect(Collectors.toList());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilters;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.knowledge.dictionary.builder.BaseWordBuilder;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import java.util.List;
|
||||
@@ -19,8 +20,6 @@ import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class QueryFilterMapper implements SchemaMapper {
|
||||
|
||||
private Long frequency = 9999999L;
|
||||
private double similarity = 1.0;
|
||||
|
||||
@Override
|
||||
@@ -37,7 +36,7 @@ public class QueryFilterMapper implements SchemaMapper {
|
||||
schemaElementMatches = Lists.newArrayList();
|
||||
schemaMapInfo.setMatchedElements(modelId, schemaElementMatches);
|
||||
}
|
||||
addValueSchemaElementMatch(schemaElementMatches, queryReq.getQueryFilters());
|
||||
addValueSchemaElementMatch(queryContext, schemaElementMatches, queryReq.getQueryFilters());
|
||||
}
|
||||
|
||||
private void clearOtherSchemaElementMatch(Long modelId, SchemaMapInfo schemaMapInfo) {
|
||||
@@ -48,7 +47,8 @@ public class QueryFilterMapper implements SchemaMapper {
|
||||
}
|
||||
}
|
||||
|
||||
private List<SchemaElementMatch> addValueSchemaElementMatch(List<SchemaElementMatch> candidateElementMatches,
|
||||
private List<SchemaElementMatch> addValueSchemaElementMatch(QueryContext queryContext,
|
||||
List<SchemaElementMatch> candidateElementMatches,
|
||||
QueryFilters queryFilter) {
|
||||
if (queryFilter == null || CollectionUtils.isEmpty(queryFilter.getFilters())) {
|
||||
return candidateElementMatches;
|
||||
@@ -62,10 +62,11 @@ public class QueryFilterMapper implements SchemaMapper {
|
||||
.name(String.valueOf(filter.getValue()))
|
||||
.type(SchemaElementType.VALUE)
|
||||
.bizName(filter.getBizName())
|
||||
.model(queryContext.getRequest().getModelId())
|
||||
.build();
|
||||
SchemaElementMatch schemaElementMatch = SchemaElementMatch.builder()
|
||||
.element(element)
|
||||
.frequency(frequency)
|
||||
.frequency(BaseWordBuilder.DEFAULT_FREQUENCY)
|
||||
.word(String.valueOf(filter.getValue()))
|
||||
.similarity(similarity)
|
||||
.detectWord(Constants.EMPTY)
|
||||
|
||||
@@ -1,163 +0,0 @@
|
||||
package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.commons.compress.utils.Lists;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* match strategy implement
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class QueryMatchStrategy implements MatchStrategy {
|
||||
|
||||
@Autowired
|
||||
private MapperHelper mapperHelper;
|
||||
|
||||
@Autowired
|
||||
private OptimizationConfig optimizationConfig;
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<MapResult>> match(QueryReq queryReq, List<Term> terms, Set<Long> detectModelIds) {
|
||||
String text = queryReq.getQueryText();
|
||||
if (Objects.isNull(terms) || StringUtils.isEmpty(text)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Map<Integer, Integer> regOffsetToLength = terms.stream().sorted(Comparator.comparing(Term::length))
|
||||
.collect(Collectors.toMap(Term::getOffset, term -> term.word.length(), (value1, value2) -> value2));
|
||||
|
||||
List<Integer> offsetList = terms.stream().sorted(Comparator.comparing(Term::getOffset))
|
||||
.map(term -> term.getOffset()).collect(Collectors.toList());
|
||||
|
||||
log.debug("retryCount:{},terms:{},regOffsetToLength:{},offsetList:{},detectModelIds:{}", terms,
|
||||
regOffsetToLength, offsetList, detectModelIds);
|
||||
|
||||
List<MapResult> detects = detect(queryReq, regOffsetToLength, offsetList, detectModelIds);
|
||||
Map<MatchText, List<MapResult>> result = new HashMap<>();
|
||||
|
||||
result.put(MatchText.builder().regText(text).detectSegment(text).build(), detects);
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<MapResult> detect(QueryReq queryReq, Map<Integer, Integer> regOffsetToLength, List<Integer> offsetList,
|
||||
Set<Long> detectModelIds) {
|
||||
String text = queryReq.getQueryText();
|
||||
List<MapResult> results = Lists.newArrayList();
|
||||
|
||||
for (Integer index = 0; index <= text.length() - 1; ) {
|
||||
|
||||
Set<MapResult> mapResultRowSet = new LinkedHashSet();
|
||||
|
||||
for (Integer i = index; i <= text.length(); ) {
|
||||
int offset = mapperHelper.getStepOffset(offsetList, index);
|
||||
i = mapperHelper.getStepIndex(regOffsetToLength, i);
|
||||
if (i <= text.length()) {
|
||||
List<MapResult> mapResults = detectByStep(queryReq, detectModelIds, index, i, offset);
|
||||
selectMapResultInOneRound(mapResultRowSet, mapResults);
|
||||
}
|
||||
}
|
||||
index = mapperHelper.getStepIndex(regOffsetToLength, index);
|
||||
results.addAll(mapResultRowSet);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private void selectMapResultInOneRound(Set<MapResult> mapResultRowSet, List<MapResult> mapResults) {
|
||||
for (MapResult mapResult : mapResults) {
|
||||
if (mapResultRowSet.contains(mapResult)) {
|
||||
boolean isDeleted = mapResultRowSet.removeIf(
|
||||
entry -> {
|
||||
boolean deleted = getMapKey(mapResult).equals(getMapKey(entry))
|
||||
&& entry.getDetectWord().length() < mapResult.getDetectWord().length();
|
||||
if (deleted) {
|
||||
log.info("deleted entry:{}", entry);
|
||||
}
|
||||
return deleted;
|
||||
}
|
||||
);
|
||||
if (isDeleted) {
|
||||
log.info("deleted, add mapResult:{}", mapResult);
|
||||
mapResultRowSet.add(mapResult);
|
||||
}
|
||||
} else {
|
||||
mapResultRowSet.add(mapResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getMapKey(MapResult a) {
|
||||
return a.getName() + Constants.UNDERLINE + String.join(Constants.UNDERLINE, a.getNatures());
|
||||
}
|
||||
|
||||
private List<MapResult> detectByStep(QueryReq queryReq, Set<Long> detectModelIds, Integer index, Integer i,
|
||||
int offset) {
|
||||
String text = queryReq.getQueryText();
|
||||
Integer agentId = queryReq.getAgentId();
|
||||
String detectSegment = text.substring(index, i);
|
||||
|
||||
// step1. pre search
|
||||
Integer oneDetectionMaxSize = optimizationConfig.getOneDetectionMaxSize();
|
||||
LinkedHashSet<MapResult> mapResults = SearchService.prefixSearch(detectSegment, oneDetectionMaxSize, agentId,
|
||||
detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
// step2. suffix search
|
||||
LinkedHashSet<MapResult> suffixMapResults = SearchService.suffixSearch(detectSegment, oneDetectionMaxSize,
|
||||
agentId, detectModelIds).stream().collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
mapResults.addAll(suffixMapResults);
|
||||
|
||||
if (CollectionUtils.isEmpty(mapResults)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
// step3. merge pre/suffix result
|
||||
mapResults = mapResults.stream().sorted((a, b) -> -(b.getName().length() - a.getName().length()))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
// step4. filter by similarity
|
||||
mapResults = mapResults.stream()
|
||||
.filter(term -> mapperHelper.getSimilarity(detectSegment, term.getName())
|
||||
>= mapperHelper.getThresholdMatch(term.getNatures()))
|
||||
.filter(term -> CollectionUtils.isNotEmpty(term.getNatures()))
|
||||
.collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
log.info("after isSimilarity parseResults:{}", mapResults);
|
||||
|
||||
mapResults = mapResults.stream().map(parseResult -> {
|
||||
parseResult.setOffset(offset);
|
||||
parseResult.setSimilarity(mapperHelper.getSimilarity(detectSegment, parseResult.getName()));
|
||||
return parseResult;
|
||||
}).collect(Collectors.toCollection(LinkedHashSet::new));
|
||||
|
||||
// step5. take only one dimension or 10 metric/dimension value per rond.
|
||||
List<MapResult> dimensionMetrics = mapResults.stream()
|
||||
.filter(entry -> mapperHelper.existDimensionValues(entry.getNatures()))
|
||||
.collect(Collectors.toList())
|
||||
.stream()
|
||||
.limit(1)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (CollectionUtils.isNotEmpty(dimensionMetrics)) {
|
||||
return dimensionMetrics;
|
||||
} else {
|
||||
return mapResults.stream().limit(optimizationConfig.getOneDetectionSize()).collect(Collectors.toList());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,9 +2,10 @@ package com.tencent.supersonic.chat.mapper;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.hankcs.hanlp.seg.common.Term;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.common.pojo.enums.DictWordType;
|
||||
import com.tencent.supersonic.knowledge.dictionary.MapResult;
|
||||
import com.tencent.supersonic.knowledge.dictionary.HanlpMapResult;
|
||||
import com.tencent.supersonic.knowledge.service.SearchService;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -17,20 +18,20 @@ import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* match strategy implement
|
||||
* SearchMatchStrategy encapsulates a concrete matching algorithm
|
||||
* executed during search process.
|
||||
*/
|
||||
@Service
|
||||
public class SearchMatchStrategy implements MatchStrategy {
|
||||
public class SearchMatchStrategy extends BaseMatchStrategy<HanlpMapResult> {
|
||||
|
||||
private static final int SEARCH_SIZE = 3;
|
||||
|
||||
@Override
|
||||
public Map<MatchText, List<MapResult>> match(QueryReq queryReq, List<Term> originals, Set<Long> detectModelIds) {
|
||||
public Map<MatchText, List<HanlpMapResult>> match(QueryContext queryContext, List<Term> originals,
|
||||
Set<Long> detectModelIds) {
|
||||
QueryReq queryReq = queryContext.getRequest();
|
||||
String text = queryReq.getQueryText();
|
||||
Map<Integer, Integer> regOffsetToLength = originals.stream()
|
||||
.filter(entry -> !entry.nature.toString().startsWith(DictWordType.NATURE_SPILT))
|
||||
.collect(Collectors.toMap(Term::getOffset, value -> value.word.length(),
|
||||
(value1, value2) -> value2));
|
||||
Map<Integer, Integer> regOffsetToLength = getRegOffsetToLength(originals);
|
||||
|
||||
List<Integer> detectIndexList = Lists.newArrayList();
|
||||
|
||||
@@ -46,19 +47,19 @@ public class SearchMatchStrategy implements MatchStrategy {
|
||||
index++;
|
||||
}
|
||||
}
|
||||
Map<MatchText, List<MapResult>> regTextMap = new ConcurrentHashMap<>();
|
||||
Map<MatchText, List<HanlpMapResult>> regTextMap = new ConcurrentHashMap<>();
|
||||
detectIndexList.stream().parallel().forEach(detectIndex -> {
|
||||
String regText = text.substring(0, detectIndex);
|
||||
String detectSegment = text.substring(detectIndex);
|
||||
|
||||
if (StringUtils.isNotEmpty(detectSegment)) {
|
||||
List<MapResult> mapResults = SearchService.prefixSearch(detectSegment,
|
||||
List<HanlpMapResult> hanlpMapResults = SearchService.prefixSearch(detectSegment,
|
||||
SearchService.SEARCH_SIZE, queryReq.getAgentId(), detectModelIds);
|
||||
List<MapResult> suffixMapResults = SearchService.suffixSearch(detectSegment, SEARCH_SIZE,
|
||||
queryReq.getAgentId(), detectModelIds);
|
||||
mapResults.addAll(suffixMapResults);
|
||||
List<HanlpMapResult> suffixHanlpMapResults = SearchService.suffixSearch(
|
||||
detectSegment, SEARCH_SIZE, queryReq.getAgentId(), detectModelIds);
|
||||
hanlpMapResults.addAll(suffixHanlpMapResults);
|
||||
// remove entity name where search
|
||||
mapResults = mapResults.stream().filter(entry -> {
|
||||
hanlpMapResults = hanlpMapResults.stream().filter(entry -> {
|
||||
List<String> natures = entry.getNatures().stream()
|
||||
.filter(nature -> !nature.endsWith(DictWordType.ENTITY.getType()))
|
||||
.collect(Collectors.toList());
|
||||
@@ -71,10 +72,27 @@ public class SearchMatchStrategy implements MatchStrategy {
|
||||
.regText(regText)
|
||||
.detectSegment(detectSegment)
|
||||
.build();
|
||||
regTextMap.put(matchText, mapResults);
|
||||
regTextMap.put(matchText, hanlpMapResults);
|
||||
}
|
||||
}
|
||||
);
|
||||
return regTextMap;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needDelete(HanlpMapResult oneRoundResult, HanlpMapResult existResult) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMapKey(HanlpMapResult a) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void detectByStep(QueryContext queryContext, Set<HanlpMapResult> results, Set<Long> detectModelIds,
|
||||
Integer startIndex,
|
||||
Integer i, int offset) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
package com.tencent.supersonic.chat.parser;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionPromptGenerator;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.parser.sql.llm.OutputFormat;
|
||||
import com.tencent.supersonic.chat.parser.sql.llm.SqlGeneration;
|
||||
import com.tencent.supersonic.chat.parser.sql.llm.SqlGenerationFactory;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq.SqlGenerationMode;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import dev.langchain4j.model.chat.ChatLanguageModel;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* LLMProxy based on langchain4j Java version.
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class JavaLLMProxy implements LLMProxy {
|
||||
|
||||
@Override
|
||||
public boolean isSkip(QueryContext queryContext) {
|
||||
ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class);
|
||||
if (Objects.isNull(chatLanguageModel)) {
|
||||
log.warn("chatLanguageModel is null, skip :{}", JavaLLMProxy.class.getName());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public LLMResp query2sql(LLMReq llmReq, String modelClusterKey) {
|
||||
|
||||
SqlGeneration sqlGeneration = SqlGenerationFactory.get(
|
||||
SqlGenerationMode.getMode(llmReq.getSqlGenerationMode()));
|
||||
String modelName = llmReq.getSchema().getModelName();
|
||||
Map<String, Double> sqlWeight = sqlGeneration.generation(llmReq, modelClusterKey);
|
||||
|
||||
LLMResp result = new LLMResp();
|
||||
result.setQuery(llmReq.getQueryText());
|
||||
result.setModelName(modelName);
|
||||
result.setSqlWeight(sqlWeight);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionResp requestFunction(FunctionReq functionReq) {
|
||||
|
||||
FunctionPromptGenerator promptGenerator = ContextUtils.getBean(FunctionPromptGenerator.class);
|
||||
|
||||
String functionCallPrompt = promptGenerator.generateFunctionCallPrompt(functionReq.getQueryText(),
|
||||
functionReq.getPluginConfigs());
|
||||
|
||||
ChatLanguageModel chatLanguageModel = ContextUtils.getBean(ChatLanguageModel.class);
|
||||
|
||||
String functionSelect = chatLanguageModel.generate(functionCallPrompt);
|
||||
|
||||
return OutputFormat.functionCallParse(functionSelect);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
package com.tencent.supersonic.chat.parser;
|
||||
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
|
||||
/**
|
||||
* LLMProxy encapsulates functions performed by LLMs so that multiple
|
||||
* orchestration frameworks (e.g. LangChain in python, LangChain4j in java)
|
||||
* could be used.
|
||||
*/
|
||||
public interface LLMProxy {
|
||||
|
||||
boolean isSkip(QueryContext queryContext);
|
||||
|
||||
LLMResp query2sql(LLMReq llmReq, String modelClusterKey);
|
||||
|
||||
FunctionResp requestFunction(FunctionReq functionReq);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
package com.tencent.supersonic.chat.parser;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.config.LLMParserConfig;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionCallConfig;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionReq;
|
||||
import com.tencent.supersonic.chat.parser.plugin.function.FunctionResp;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMResp;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
/**
|
||||
* PythonLLMProxy sends requests to LangChain-based python service.
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
public class PythonLLMProxy implements LLMProxy {
|
||||
|
||||
@Override
|
||||
public boolean isSkip(QueryContext queryContext) {
|
||||
LLMParserConfig llmParserConfig = ContextUtils.getBean(LLMParserConfig.class);
|
||||
if (StringUtils.isEmpty(llmParserConfig.getUrl())) {
|
||||
log.warn("llmParserUrl is empty, skip :{}", PythonLLMProxy.class.getName());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public LLMResp query2sql(LLMReq llmReq, String modelClusterKey) {
|
||||
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.info("requestLLM request, modelId:{},llmReq:{}", modelClusterKey, llmReq);
|
||||
try {
|
||||
LLMParserConfig llmParserConfig = ContextUtils.getBean(LLMParserConfig.class);
|
||||
|
||||
URL url = new URL(new URL(llmParserConfig.getUrl()), llmParserConfig.getQueryToSqlPath());
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers);
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
ResponseEntity<LLMResp> responseEntity = restTemplate.exchange(url.toString(), HttpMethod.POST, entity,
|
||||
LLMResp.class);
|
||||
|
||||
log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}",
|
||||
System.currentTimeMillis() - startTime, url, entity, responseEntity.getBody());
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestLLM error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public FunctionResp requestFunction(FunctionReq functionReq) {
|
||||
FunctionCallConfig functionCallInfoConfig = ContextUtils.getBean(FunctionCallConfig.class);
|
||||
String url = functionCallInfoConfig.getUrl() + functionCallInfoConfig.getPluginSelectPath();
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
long startTime = System.currentTimeMillis();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JSON.toJSONString(functionReq), headers);
|
||||
URI requestUrl = UriComponentsBuilder.fromHttpUrl(url).build().encode().toUri();
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
try {
|
||||
log.info("requestFunction functionReq:{}", JsonUtil.toString(functionReq));
|
||||
ResponseEntity<FunctionResp> responseEntity = restTemplate.exchange(requestUrl, HttpMethod.POST, entity,
|
||||
FunctionResp.class);
|
||||
log.info("requestFunction responseEntity:{},cost:{}", responseEntity,
|
||||
System.currentTimeMillis() - startTime);
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestFunction error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
package com.tencent.supersonic.chat.parser;
|
||||
|
||||
import com.tencent.supersonic.auth.api.authentication.pojo.User;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticQuery;
|
||||
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.response.SqlInfo;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMSqlQuery;
|
||||
import com.tencent.supersonic.chat.query.rule.RuleSemanticQuery;
|
||||
import com.tencent.supersonic.chat.service.SemanticService;
|
||||
import com.tencent.supersonic.common.pojo.enums.QueryType;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* QueryTypeParser resolves query type as either METRIC or TAG, or ID.
|
||||
*/
|
||||
@Slf4j
|
||||
public class QueryTypeParser implements SemanticParser {
|
||||
|
||||
@Override
|
||||
public void parse(QueryContext queryContext, ChatContext chatContext) {
|
||||
|
||||
List<SemanticQuery> candidateQueries = queryContext.getCandidateQueries();
|
||||
User user = queryContext.getRequest().getUser();
|
||||
|
||||
for (SemanticQuery semanticQuery : candidateQueries) {
|
||||
// 1.init S2SQL
|
||||
semanticQuery.initS2Sql(user);
|
||||
// 2.set queryType
|
||||
QueryType queryType = getQueryType(semanticQuery);
|
||||
semanticQuery.getParseInfo().setQueryType(queryType);
|
||||
}
|
||||
}
|
||||
|
||||
private QueryType getQueryType(SemanticQuery semanticQuery) {
|
||||
SemanticParseInfo parseInfo = semanticQuery.getParseInfo();
|
||||
SqlInfo sqlInfo = parseInfo.getSqlInfo();
|
||||
if (Objects.isNull(sqlInfo) || StringUtils.isBlank(sqlInfo.getS2SQL())) {
|
||||
return QueryType.ID;
|
||||
}
|
||||
//1. entity queryType
|
||||
Set<Long> modelIds = parseInfo.getModel().getModelIds();
|
||||
if (semanticQuery instanceof RuleSemanticQuery || semanticQuery instanceof LLMSqlQuery) {
|
||||
//If all the fields in the SELECT statement are of tag type.
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(sqlInfo.getS2SQL());
|
||||
SemanticService semanticService = ContextUtils.getBean(SemanticService.class);
|
||||
SemanticSchema semanticSchema = semanticService.getSemanticSchema();
|
||||
if (CollectionUtils.isNotEmpty(selectFields)) {
|
||||
Set<String> tags = semanticSchema.getTags(modelIds).stream().map(SchemaElement::getName)
|
||||
.collect(Collectors.toSet());
|
||||
if (CollectionUtils.isNotEmpty(tags) && tags.containsAll(selectFields)) {
|
||||
return QueryType.TAG;
|
||||
}
|
||||
}
|
||||
}
|
||||
//2. metric queryType
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(sqlInfo.getS2SQL());
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
List<SchemaElement> metrics = semanticSchema.getMetrics(modelIds);
|
||||
if (CollectionUtils.isNotEmpty(metrics)) {
|
||||
Set<String> metricNameSet = metrics.stream().map(SchemaElement::getName).collect(Collectors.toSet());
|
||||
boolean containMetric = selectFields.stream().anyMatch(metricNameSet::contains);
|
||||
if (containMetric) {
|
||||
return QueryType.METRIC;
|
||||
}
|
||||
}
|
||||
return QueryType.ID;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -5,7 +5,7 @@ import com.tencent.supersonic.chat.api.component.SemanticQuery;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.config.OptimizationConfig;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.DslQuery;
|
||||
import com.tencent.supersonic.chat.query.llm.s2sql.LLMSqlQuery;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@@ -18,9 +18,9 @@ import lombok.extern.slf4j.Slf4j;
|
||||
public class SatisfactionChecker {
|
||||
|
||||
// check all the parse info in candidate
|
||||
public static boolean check(QueryContext queryContext) {
|
||||
public static boolean isSkip(QueryContext queryContext) {
|
||||
for (SemanticQuery query : queryContext.getCandidateQueries()) {
|
||||
if (query.getQueryMode().equals(DslQuery.QUERY_MODE)) {
|
||||
if (query.getQueryMode().equals(LLMSqlQuery.QUERY_MODE)) {
|
||||
continue;
|
||||
}
|
||||
if (checkThreshold(queryContext.getRequest().getQueryText(), query.getParseInfo())) {
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
package com.tencent.supersonic.chat.parser.llm.dsl;
|
||||
|
||||
import com.tencent.supersonic.chat.agent.tool.DslTool;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMResp;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class DSLParseResult {
|
||||
|
||||
private LLMReq llmReq;
|
||||
|
||||
private LLMResp llmResp;
|
||||
|
||||
private QueryReq request;
|
||||
|
||||
private DslTool dslTool;
|
||||
}
|
||||
@@ -1,231 +0,0 @@
|
||||
package com.tencent.supersonic.chat.parser.llm.dsl;
|
||||
|
||||
import com.tencent.supersonic.chat.api.component.SemanticQuery;
|
||||
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaMapInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
|
||||
@Slf4j
|
||||
public class HeuristicModelResolver implements ModelResolver {
|
||||
|
||||
protected static Long selectModelBySchemaElementCount(Map<Long, SemanticQuery> modelQueryModes,
|
||||
SchemaMapInfo schemaMap) {
|
||||
//model count priority
|
||||
Long modelIdByModelCount = getModelIdByModelCount(schemaMap);
|
||||
if (Objects.nonNull(modelIdByModelCount)) {
|
||||
log.info("selectModel by model count:{}", modelIdByModelCount);
|
||||
return modelIdByModelCount;
|
||||
}
|
||||
|
||||
Map<Long, ModelMatchResult> modelTypeMap = getModelTypeMap(schemaMap);
|
||||
if (modelTypeMap.size() == 1) {
|
||||
Long modelSelect = modelTypeMap.entrySet().stream().collect(Collectors.toList()).get(0).getKey();
|
||||
if (modelQueryModes.containsKey(modelSelect)) {
|
||||
log.info("selectModel with only one Model [{}]", modelSelect);
|
||||
return modelSelect;
|
||||
}
|
||||
} else {
|
||||
|
||||
Map.Entry<Long, ModelMatchResult> maxModel = modelTypeMap.entrySet().stream()
|
||||
.filter(entry -> modelQueryModes.containsKey(entry.getKey()))
|
||||
.sorted((o1, o2) -> {
|
||||
int difference = o2.getValue().getCount() - o1.getValue().getCount();
|
||||
if (difference == 0) {
|
||||
return (int) ((o2.getValue().getMaxSimilarity()
|
||||
- o1.getValue().getMaxSimilarity()) * 100);
|
||||
}
|
||||
return difference;
|
||||
}).findFirst().orElse(null);
|
||||
if (maxModel != null) {
|
||||
log.info("selectModel with multiple Models [{}]", maxModel.getKey());
|
||||
return maxModel.getKey();
|
||||
}
|
||||
}
|
||||
return 0L;
|
||||
}
|
||||
|
||||
private static Long getModelIdByModelCount(SchemaMapInfo schemaMap) {
|
||||
Map<Long, List<SchemaElementMatch>> modelElementMatches = schemaMap.getModelElementMatches();
|
||||
Map<Long, Integer> modelIdToModelCount = new HashMap<>();
|
||||
if (Objects.nonNull(modelElementMatches)) {
|
||||
for (Entry<Long, List<SchemaElementMatch>> modelElementMatch : modelElementMatches.entrySet()) {
|
||||
Long modelId = modelElementMatch.getKey();
|
||||
List<SchemaElementMatch> modelMatches = modelElementMatch.getValue().stream().filter(
|
||||
elementMatch -> SchemaElementType.MODEL.equals(elementMatch.getElement().getType())
|
||||
).collect(Collectors.toList());
|
||||
|
||||
if (!CollectionUtils.isEmpty(modelMatches)) {
|
||||
Integer count = modelMatches.size();
|
||||
modelIdToModelCount.put(modelId, count);
|
||||
}
|
||||
}
|
||||
Entry<Long, Integer> maxModelCount = modelIdToModelCount.entrySet().stream()
|
||||
.max(Comparator.comparingInt(o -> o.getValue())).orElse(null);
|
||||
log.info("maxModelCount:{},modelIdToModelCount:{}", maxModelCount, modelIdToModelCount);
|
||||
if (Objects.nonNull(maxModelCount)) {
|
||||
return maxModelCount.getKey();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* to check can switch Model if context exit Model
|
||||
*
|
||||
* @return false will use context Model, true will use other Model , maybe include context Model
|
||||
*/
|
||||
protected static boolean isAllowSwitch(Map<Long, SemanticQuery> modelQueryModes, SchemaMapInfo schemaMap,
|
||||
ChatContext chatCtx, QueryReq searchCtx,
|
||||
Long modelId, Set<Long> restrictiveModels) {
|
||||
if (!Objects.nonNull(modelId) || modelId <= 0) {
|
||||
return true;
|
||||
}
|
||||
// except content Model, calculate the number of types for each Model, if numbers<=1 will not switch
|
||||
Map<Long, ModelMatchResult> modelTypeMap = getModelTypeMap(schemaMap);
|
||||
log.info("isAllowSwitch ModelTypeMap [{}]", modelTypeMap);
|
||||
long otherModelTypeNumBigOneCount = modelTypeMap.entrySet().stream()
|
||||
.filter(entry -> modelQueryModes.containsKey(entry.getKey()) && !entry.getKey().equals(modelId))
|
||||
.filter(entry -> entry.getValue().getCount() > 1).count();
|
||||
if (otherModelTypeNumBigOneCount >= 1) {
|
||||
return true;
|
||||
}
|
||||
// if query text only contain time , will not switch
|
||||
if (!CollectionUtils.isEmpty(modelQueryModes.values())) {
|
||||
for (SemanticQuery semanticQuery : modelQueryModes.values()) {
|
||||
if (semanticQuery == null) {
|
||||
continue;
|
||||
}
|
||||
SemanticParseInfo semanticParseInfo = semanticQuery.getParseInfo();
|
||||
if (semanticParseInfo == null) {
|
||||
continue;
|
||||
}
|
||||
if (searchCtx.getQueryText() != null && semanticParseInfo.getDateInfo() != null) {
|
||||
if (semanticParseInfo.getDateInfo().getDetectWord() != null) {
|
||||
if (semanticParseInfo.getDateInfo().getDetectWord()
|
||||
.equalsIgnoreCase(searchCtx.getQueryText())) {
|
||||
log.info("timeParseResults is not null , can not switch context , timeParseResults:{},",
|
||||
semanticParseInfo.getDateInfo());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (CollectionUtils.isNotEmpty(restrictiveModels) && !restrictiveModels.contains(modelId)) {
|
||||
return true;
|
||||
}
|
||||
// if context Model not in schemaMap , will switch
|
||||
if (schemaMap.getMatchedElements(modelId) == null || schemaMap.getMatchedElements(modelId).size() <= 0) {
|
||||
log.info("modelId not in schemaMap ");
|
||||
return true;
|
||||
}
|
||||
// other will not switch
|
||||
return false;
|
||||
}
|
||||
|
||||
public static Map<Long, ModelMatchResult> getModelTypeMap(SchemaMapInfo schemaMap) {
|
||||
Map<Long, ModelMatchResult> modelCount = new HashMap<>();
|
||||
for (Map.Entry<Long, List<SchemaElementMatch>> entry : schemaMap.getModelElementMatches().entrySet()) {
|
||||
List<SchemaElementMatch> schemaElementMatches = schemaMap.getMatchedElements(entry.getKey());
|
||||
if (schemaElementMatches != null && schemaElementMatches.size() > 0) {
|
||||
if (!modelCount.containsKey(entry.getKey())) {
|
||||
modelCount.put(entry.getKey(), new ModelMatchResult());
|
||||
}
|
||||
ModelMatchResult modelMatchResult = modelCount.get(entry.getKey());
|
||||
Set<SchemaElementType> schemaElementTypes = new HashSet<>();
|
||||
schemaElementMatches.stream()
|
||||
.forEach(schemaElementMatch -> schemaElementTypes.add(
|
||||
schemaElementMatch.getElement().getType()));
|
||||
SchemaElementMatch schemaElementMatchMax = schemaElementMatches.stream()
|
||||
.sorted((o1, o2) ->
|
||||
((int) ((o2.getSimilarity() - o1.getSimilarity()) * 100))
|
||||
).findFirst().orElse(null);
|
||||
if (schemaElementMatchMax != null) {
|
||||
modelMatchResult.setMaxSimilarity(schemaElementMatchMax.getSimilarity());
|
||||
}
|
||||
modelMatchResult.setCount(schemaElementTypes.size());
|
||||
|
||||
}
|
||||
}
|
||||
return modelCount;
|
||||
}
|
||||
|
||||
|
||||
public Long resolve(QueryContext queryContext, ChatContext chatCtx, Set<Long> restrictiveModels) {
|
||||
Long modelId = queryContext.getRequest().getModelId();
|
||||
if (Objects.nonNull(modelId) && modelId > 0) {
|
||||
if (CollectionUtils.isEmpty(restrictiveModels)) {
|
||||
return modelId;
|
||||
}
|
||||
if (restrictiveModels.contains(modelId)) {
|
||||
return modelId;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
SchemaMapInfo mapInfo = queryContext.getMapInfo();
|
||||
Set<Long> matchedModels = mapInfo.getMatchedModels();
|
||||
if (CollectionUtils.isNotEmpty(restrictiveModels)) {
|
||||
matchedModels = matchedModels.stream()
|
||||
.filter(restrictiveModels::contains)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
Map<Long, SemanticQuery> modelQueryModes = new HashMap<>();
|
||||
for (Long matchedModel : matchedModels) {
|
||||
modelQueryModes.put(matchedModel, null);
|
||||
}
|
||||
if (modelQueryModes.size() == 1) {
|
||||
return modelQueryModes.keySet().stream().findFirst().get();
|
||||
}
|
||||
return resolve(modelQueryModes, queryContext, chatCtx,
|
||||
queryContext.getMapInfo(), restrictiveModels);
|
||||
}
|
||||
|
||||
public Long resolve(Map<Long, SemanticQuery> modelQueryModes, QueryContext queryContext,
|
||||
ChatContext chatCtx, SchemaMapInfo schemaMap, Set<Long> restrictiveModels) {
|
||||
Long selectModel = selectModel(modelQueryModes, queryContext.getRequest(),
|
||||
chatCtx, schemaMap, restrictiveModels);
|
||||
if (selectModel > 0) {
|
||||
log.info("selectModel {} ", selectModel);
|
||||
return selectModel;
|
||||
}
|
||||
// get the max SchemaElementType number
|
||||
return selectModelBySchemaElementCount(modelQueryModes, schemaMap);
|
||||
}
|
||||
|
||||
public Long selectModel(Map<Long, SemanticQuery> modelQueryModes, QueryReq queryContext,
|
||||
ChatContext chatCtx,
|
||||
SchemaMapInfo schemaMap, Set<Long> restrictiveModels) {
|
||||
// if QueryContext has modelId and in ModelQueryModes
|
||||
if (modelQueryModes.containsKey(queryContext.getModelId())) {
|
||||
log.info("selectModel from QueryContext [{}]", queryContext.getModelId());
|
||||
return queryContext.getModelId();
|
||||
}
|
||||
// if ChatContext has modelId and in ModelQueryModes
|
||||
if (chatCtx.getParseInfo().getModelId() > 0) {
|
||||
Long modelId = chatCtx.getParseInfo().getModelId();
|
||||
if (!isAllowSwitch(modelQueryModes, schemaMap, chatCtx, queryContext, modelId, restrictiveModels)) {
|
||||
log.info("selectModel from ChatContext [{}]", modelId);
|
||||
return modelId;
|
||||
}
|
||||
}
|
||||
// default 0
|
||||
return 0L;
|
||||
}
|
||||
}
|
||||
@@ -1,460 +0,0 @@
|
||||
package com.tencent.supersonic.chat.parser.llm.dsl;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.tencent.supersonic.chat.agent.tool.AgentToolType;
|
||||
import com.tencent.supersonic.chat.agent.tool.DslTool;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticCorrector;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
||||
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticCorrectInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.config.LLMParserConfig;
|
||||
import com.tencent.supersonic.chat.parser.SatisfactionChecker;
|
||||
import com.tencent.supersonic.chat.query.QueryManager;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.DslQuery;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMReq.ElementValue;
|
||||
import com.tencent.supersonic.chat.query.llm.dsl.LLMResp;
|
||||
import com.tencent.supersonic.chat.query.plugin.PluginSemanticQuery;
|
||||
import com.tencent.supersonic.chat.service.AgentService;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import com.tencent.supersonic.common.pojo.Constants;
|
||||
import com.tencent.supersonic.common.pojo.DateConf;
|
||||
import com.tencent.supersonic.common.pojo.DateConf.DateMode;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.common.util.DateUtils;
|
||||
import com.tencent.supersonic.common.util.JsonUtil;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.FilterExpression;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectFunctionHelper;
|
||||
import com.tencent.supersonic.common.util.jsqlparser.SqlParserSelectHelper;
|
||||
import com.tencent.supersonic.knowledge.service.SchemaService;
|
||||
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@Slf4j
|
||||
public class LLMDslParser implements SemanticParser {
|
||||
|
||||
@Override
|
||||
public void parse(QueryContext queryCtx, ChatContext chatCtx) {
|
||||
QueryReq request = queryCtx.getRequest();
|
||||
LLMParserConfig llmParserConfig = ContextUtils.getBean(LLMParserConfig.class);
|
||||
if (StringUtils.isEmpty(llmParserConfig.getUrl())) {
|
||||
log.info("llm parser url is empty, skip dsl parser, llmParserConfig:{}", llmParserConfig);
|
||||
return;
|
||||
}
|
||||
if (SatisfactionChecker.check(queryCtx)) {
|
||||
log.info("skip dsl parser, queryText:{}", request.getQueryText());
|
||||
return;
|
||||
}
|
||||
try {
|
||||
Long modelId = getModelId(queryCtx, chatCtx, request.getAgentId());
|
||||
if (Objects.isNull(modelId) || modelId <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
DslTool dslTool = getDslTool(request, modelId);
|
||||
if (Objects.isNull(dslTool)) {
|
||||
log.info("no dsl tool in this agent, skip dsl parser");
|
||||
return;
|
||||
}
|
||||
|
||||
LLMReq llmReq = getLlmReq(queryCtx, modelId, llmParserConfig);
|
||||
LLMResp llmResp = requestLLM(llmReq, modelId, llmParserConfig);
|
||||
|
||||
if (Objects.isNull(llmResp)) {
|
||||
return;
|
||||
}
|
||||
DSLParseResult dslParseResult = DSLParseResult.builder().request(request)
|
||||
.dslTool(dslTool).llmReq(llmReq).llmResp(llmResp).build();
|
||||
|
||||
SemanticParseInfo parseInfo = getParseInfo(queryCtx, modelId, dslTool, dslParseResult);
|
||||
|
||||
SemanticCorrectInfo semanticCorrectInfo = getCorrectorSql(queryCtx, parseInfo, llmResp.getSqlOutput());
|
||||
|
||||
llmResp.setCorrectorSql(semanticCorrectInfo.getSql());
|
||||
|
||||
updateParseInfo(semanticCorrectInfo, modelId, parseInfo);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("LLMDSLParser error", e);
|
||||
}
|
||||
}
|
||||
|
||||
private Set<SchemaElement> getElements(Long modelId, List<String> allFields, List<SchemaElement> elements) {
|
||||
return elements.stream()
|
||||
.filter(schemaElement -> modelId.equals(schemaElement.getModel())
|
||||
&& allFields.contains(schemaElement.getName())
|
||||
).collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private List<String> getFieldsExceptDate(List<String> allFields) {
|
||||
if (CollectionUtils.isEmpty(allFields)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
return allFields.stream()
|
||||
.filter(entry -> !DateUtils.DATE_FIELD.equalsIgnoreCase(entry))
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
public void updateParseInfo(SemanticCorrectInfo semanticCorrectInfo, Long modelId, SemanticParseInfo parseInfo) {
|
||||
|
||||
String correctorSql = semanticCorrectInfo.getSql();
|
||||
parseInfo.getSqlInfo().setLogicSql(correctorSql);
|
||||
|
||||
List<FilterExpression> expressions = SqlParserSelectHelper.getFilterExpression(correctorSql);
|
||||
//set dataInfo
|
||||
try {
|
||||
if (!CollectionUtils.isEmpty(expressions)) {
|
||||
DateConf dateInfo = getDateInfo(expressions);
|
||||
parseInfo.setDateInfo(dateInfo);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("set dateInfo error :", e);
|
||||
}
|
||||
|
||||
//set filter
|
||||
try {
|
||||
Map<String, SchemaElement> fieldNameToElement = getNameToElement(modelId);
|
||||
List<QueryFilter> result = getDimensionFilter(fieldNameToElement, expressions);
|
||||
parseInfo.getDimensionFilters().addAll(result);
|
||||
} catch (Exception e) {
|
||||
log.error("set dimensionFilter error :", e);
|
||||
}
|
||||
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
|
||||
if (Objects.isNull(semanticSchema)) {
|
||||
return;
|
||||
}
|
||||
List<String> allFields = getFieldsExceptDate(SqlParserSelectHelper.getAllFields(semanticCorrectInfo.getSql()));
|
||||
|
||||
Set<SchemaElement> metrics = getElements(modelId, allFields, semanticSchema.getMetrics());
|
||||
parseInfo.setMetrics(metrics);
|
||||
|
||||
if (SqlParserSelectFunctionHelper.hasAggregateFunction(semanticCorrectInfo.getSql())) {
|
||||
parseInfo.setNativeQuery(false);
|
||||
List<String> groupByFields = SqlParserSelectHelper.getGroupByFields(semanticCorrectInfo.getSql());
|
||||
List<String> groupByDimensions = getFieldsExceptDate(groupByFields);
|
||||
parseInfo.setDimensions(getElements(modelId, groupByDimensions, semanticSchema.getDimensions()));
|
||||
} else {
|
||||
parseInfo.setNativeQuery(true);
|
||||
List<String> selectFields = SqlParserSelectHelper.getSelectFields(semanticCorrectInfo.getSql());
|
||||
List<String> selectDimensions = getFieldsExceptDate(selectFields);
|
||||
parseInfo.setDimensions(getElements(modelId, selectDimensions, semanticSchema.getDimensions()));
|
||||
}
|
||||
}
|
||||
|
||||
private List<QueryFilter> getDimensionFilter(Map<String, SchemaElement> fieldNameToElement,
|
||||
List<FilterExpression> filterExpressions) {
|
||||
List<QueryFilter> result = Lists.newArrayList();
|
||||
for (FilterExpression expression : filterExpressions) {
|
||||
QueryFilter dimensionFilter = new QueryFilter();
|
||||
dimensionFilter.setValue(expression.getFieldValue());
|
||||
SchemaElement schemaElement = fieldNameToElement.get(expression.getFieldName());
|
||||
if (Objects.isNull(schemaElement)) {
|
||||
continue;
|
||||
}
|
||||
dimensionFilter.setName(schemaElement.getName());
|
||||
dimensionFilter.setBizName(schemaElement.getBizName());
|
||||
dimensionFilter.setElementID(schemaElement.getId());
|
||||
|
||||
FilterOperatorEnum operatorEnum = FilterOperatorEnum.getSqlOperator(expression.getOperator());
|
||||
dimensionFilter.setOperator(operatorEnum);
|
||||
dimensionFilter.setFunction(expression.getFunction());
|
||||
result.add(dimensionFilter);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private DateConf getDateInfo(List<FilterExpression> filterExpressions) {
|
||||
List<FilterExpression> dateExpressions = filterExpressions.stream()
|
||||
.filter(expression -> DateUtils.DATE_FIELD.equalsIgnoreCase(expression.getFieldName()))
|
||||
.collect(Collectors.toList());
|
||||
if (CollectionUtils.isEmpty(dateExpressions)) {
|
||||
return new DateConf();
|
||||
}
|
||||
DateConf dateInfo = new DateConf();
|
||||
dateInfo.setDateMode(DateMode.BETWEEN);
|
||||
FilterExpression firstExpression = dateExpressions.get(0);
|
||||
|
||||
FilterOperatorEnum firstOperator = FilterOperatorEnum.getSqlOperator(firstExpression.getOperator());
|
||||
if (FilterOperatorEnum.EQUALS.equals(firstOperator) && Objects.nonNull(firstExpression.getFieldValue())) {
|
||||
dateInfo.setStartDate(firstExpression.getFieldValue().toString());
|
||||
dateInfo.setEndDate(firstExpression.getFieldValue().toString());
|
||||
dateInfo.setDateMode(DateMode.BETWEEN);
|
||||
return dateInfo;
|
||||
}
|
||||
if (containOperators(firstExpression, firstOperator, FilterOperatorEnum.GREATER_THAN,
|
||||
FilterOperatorEnum.GREATER_THAN_EQUALS)) {
|
||||
dateInfo.setStartDate(firstExpression.getFieldValue().toString());
|
||||
if (hasSecondDate(dateExpressions)) {
|
||||
dateInfo.setEndDate(dateExpressions.get(1).getFieldValue().toString());
|
||||
}
|
||||
}
|
||||
if (containOperators(firstExpression, firstOperator, FilterOperatorEnum.MINOR_THAN,
|
||||
FilterOperatorEnum.MINOR_THAN_EQUALS)) {
|
||||
dateInfo.setEndDate(firstExpression.getFieldValue().toString());
|
||||
if (hasSecondDate(dateExpressions)) {
|
||||
dateInfo.setStartDate(dateExpressions.get(1).getFieldValue().toString());
|
||||
}
|
||||
}
|
||||
return dateInfo;
|
||||
}
|
||||
|
||||
private boolean containOperators(FilterExpression expression, FilterOperatorEnum firstOperator,
|
||||
FilterOperatorEnum... operatorEnums) {
|
||||
return (Arrays.asList(operatorEnums).contains(firstOperator) && Objects.nonNull(expression.getFieldValue()));
|
||||
}
|
||||
|
||||
private boolean hasSecondDate(List<FilterExpression> dateExpressions) {
|
||||
return dateExpressions.size() > 1 && Objects.nonNull(dateExpressions.get(1).getFieldValue());
|
||||
}
|
||||
|
||||
private SemanticCorrectInfo getCorrectorSql(QueryContext queryCtx, SemanticParseInfo parseInfo, String sql) {
|
||||
|
||||
SemanticCorrectInfo correctInfo = SemanticCorrectInfo.builder()
|
||||
.queryFilters(queryCtx.getRequest().getQueryFilters()).sql(sql)
|
||||
.parseInfo(parseInfo).build();
|
||||
|
||||
List<SemanticCorrector> dslCorrections = ComponentFactory.getSqlCorrections();
|
||||
|
||||
dslCorrections.forEach(dslCorrection -> {
|
||||
try {
|
||||
dslCorrection.correct(correctInfo);
|
||||
log.info("sqlCorrection:{} sql:{}", dslCorrection.getClass().getSimpleName(), correctInfo.getSql());
|
||||
} catch (Exception e) {
|
||||
log.error(String.format("correct error,correctInfo:%s", correctInfo), e);
|
||||
}
|
||||
});
|
||||
return correctInfo;
|
||||
}
|
||||
|
||||
private SemanticParseInfo getParseInfo(QueryContext queryCtx, Long modelId, DslTool dslTool,
|
||||
DSLParseResult dslParseResult) {
|
||||
PluginSemanticQuery semanticQuery = QueryManager.createPluginQuery(DslQuery.QUERY_MODE);
|
||||
SemanticParseInfo parseInfo = semanticQuery.getParseInfo();
|
||||
parseInfo.getElementMatches().addAll(queryCtx.getMapInfo().getMatchedElements(modelId));
|
||||
|
||||
Map<String, Object> properties = new HashMap<>();
|
||||
properties.put(Constants.CONTEXT, dslParseResult);
|
||||
properties.put("type", "internal");
|
||||
properties.put("name", dslTool.getName());
|
||||
|
||||
parseInfo.setProperties(properties);
|
||||
parseInfo.setScore(queryCtx.getRequest().getQueryText().length());
|
||||
parseInfo.setQueryMode(semanticQuery.getQueryMode());
|
||||
parseInfo.getSqlInfo().setLlmParseSql(dslParseResult.getLlmResp().getSqlOutput());
|
||||
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
Map<Long, String> modelIdToName = semanticSchema.getModelIdToName();
|
||||
|
||||
SchemaElement model = new SchemaElement();
|
||||
model.setModel(modelId);
|
||||
model.setId(modelId);
|
||||
model.setName(modelIdToName.get(modelId));
|
||||
parseInfo.setModel(model);
|
||||
queryCtx.getCandidateQueries().add(semanticQuery);
|
||||
return parseInfo;
|
||||
}
|
||||
|
||||
private DslTool getDslTool(QueryReq request, Long modelId) {
|
||||
AgentService agentService = ContextUtils.getBean(AgentService.class);
|
||||
List<DslTool> dslTools = agentService.getDslTools(request.getAgentId(), AgentToolType.DSL);
|
||||
Optional<DslTool> dslToolOptional = dslTools.stream()
|
||||
.filter(tool -> {
|
||||
List<Long> modelIds = tool.getModelIds();
|
||||
if (agentService.containsAllModel(new HashSet<>(modelIds))) {
|
||||
return true;
|
||||
}
|
||||
return modelIds.contains(modelId);
|
||||
})
|
||||
.findFirst();
|
||||
return dslToolOptional.orElse(null);
|
||||
}
|
||||
|
||||
private Long getModelId(QueryContext queryCtx, ChatContext chatCtx, Integer agentId) {
|
||||
AgentService agentService = ContextUtils.getBean(AgentService.class);
|
||||
Set<Long> distinctModelIds = agentService.getDslToolsModelIds(agentId, AgentToolType.DSL);
|
||||
if (agentService.containsAllModel(distinctModelIds)) {
|
||||
distinctModelIds = new HashSet<>();
|
||||
}
|
||||
ModelResolver modelResolver = ComponentFactory.getModelResolver();
|
||||
Long modelId = modelResolver.resolve(queryCtx, chatCtx, distinctModelIds);
|
||||
log.info("resolve modelId:{},dslModels:{}", modelId, distinctModelIds);
|
||||
return modelId;
|
||||
}
|
||||
|
||||
private LLMResp requestLLM(LLMReq llmReq, Long modelId, LLMParserConfig llmParserConfig) {
|
||||
String questUrl = llmParserConfig.getUrl() + llmParserConfig.getQueryToSqlPath();
|
||||
long startTime = System.currentTimeMillis();
|
||||
log.info("requestLLM request, modelId:{},llmReq:{}", modelId, llmReq);
|
||||
RestTemplate restTemplate = ContextUtils.getBean(RestTemplate.class);
|
||||
try {
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.setContentType(MediaType.APPLICATION_JSON);
|
||||
HttpEntity<String> entity = new HttpEntity<>(JsonUtil.toString(llmReq), headers);
|
||||
ResponseEntity<LLMResp> responseEntity = restTemplate.exchange(questUrl, HttpMethod.POST, entity,
|
||||
LLMResp.class);
|
||||
|
||||
log.info("requestLLM response,cost:{}, questUrl:{} \n entity:{} \n body:{}",
|
||||
System.currentTimeMillis() - startTime, questUrl, entity, responseEntity.getBody());
|
||||
return responseEntity.getBody();
|
||||
} catch (Exception e) {
|
||||
log.error("requestLLM error", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private LLMReq getLlmReq(QueryContext queryCtx, Long modelId, LLMParserConfig llmParserConfig) {
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
Map<Long, String> modelIdToName = semanticSchema.getModelIdToName();
|
||||
String queryText = queryCtx.getRequest().getQueryText();
|
||||
|
||||
LLMReq llmReq = new LLMReq();
|
||||
llmReq.setQueryText(queryText);
|
||||
|
||||
LLMReq.LLMSchema llmSchema = new LLMReq.LLMSchema();
|
||||
llmSchema.setModelName(modelIdToName.get(modelId));
|
||||
llmSchema.setDomainName(modelIdToName.get(modelId));
|
||||
|
||||
List<String> fieldNameList = getFieldNameList(queryCtx, modelId, semanticSchema, llmParserConfig);
|
||||
|
||||
fieldNameList.add(DateUtils.DATE_FIELD);
|
||||
llmSchema.setFieldNameList(fieldNameList);
|
||||
llmReq.setSchema(llmSchema);
|
||||
|
||||
List<ElementValue> linking = new ArrayList<>();
|
||||
linking.addAll(getValueList(queryCtx, modelId, semanticSchema));
|
||||
llmReq.setLinking(linking);
|
||||
|
||||
String currentDate = DSLDateHelper.getReferenceDate(modelId);
|
||||
llmReq.setCurrentDate(currentDate);
|
||||
return llmReq;
|
||||
}
|
||||
|
||||
protected List<ElementValue> getValueList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) {
|
||||
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
|
||||
|
||||
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
|
||||
if (CollectionUtils.isEmpty(matchedElements)) {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
Set<ElementValue> valueMatches = matchedElements
|
||||
.stream()
|
||||
.filter(elementMatch -> !elementMatch.isInherited())
|
||||
.filter(schemaElementMatch -> {
|
||||
SchemaElementType type = schemaElementMatch.getElement().getType();
|
||||
return SchemaElementType.VALUE.equals(type) || SchemaElementType.ID.equals(type);
|
||||
})
|
||||
.map(elementMatch -> {
|
||||
ElementValue elementValue = new ElementValue();
|
||||
elementValue.setFieldName(itemIdToName.get(elementMatch.getElement().getId()));
|
||||
elementValue.setFieldValue(elementMatch.getWord());
|
||||
return elementValue;
|
||||
}).collect(Collectors.toSet());
|
||||
return new ArrayList<>(valueMatches);
|
||||
}
|
||||
|
||||
|
||||
protected Map<String, SchemaElement> getNameToElement(Long modelId) {
|
||||
SemanticSchema semanticSchema = ContextUtils.getBean(SchemaService.class).getSemanticSchema();
|
||||
List<SchemaElement> dimensions = semanticSchema.getDimensions();
|
||||
List<SchemaElement> metrics = semanticSchema.getMetrics();
|
||||
|
||||
List<SchemaElement> allElements = Lists.newArrayList();
|
||||
allElements.addAll(dimensions);
|
||||
allElements.addAll(metrics);
|
||||
return allElements.stream()
|
||||
.filter(schemaElement -> schemaElement.getModel().equals(modelId))
|
||||
.collect(Collectors.toMap(SchemaElement::getName, Function.identity(), (value1, value2) -> value2));
|
||||
}
|
||||
|
||||
|
||||
protected List<String> getFieldNameList(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema,
|
||||
LLMParserConfig llmParserConfig) {
|
||||
|
||||
Set<String> results = getTopNFieldNames(modelId, semanticSchema, llmParserConfig);
|
||||
|
||||
Set<String> fieldNameList = getMatchedFieldNames(queryCtx, modelId, semanticSchema);
|
||||
|
||||
results.addAll(fieldNameList);
|
||||
return new ArrayList<>(results);
|
||||
}
|
||||
|
||||
protected Set<String> getMatchedFieldNames(QueryContext queryCtx, Long modelId, SemanticSchema semanticSchema) {
|
||||
Map<Long, String> itemIdToName = getItemIdToName(modelId, semanticSchema);
|
||||
List<SchemaElementMatch> matchedElements = queryCtx.getMapInfo().getMatchedElements(modelId);
|
||||
if (CollectionUtils.isEmpty(matchedElements)) {
|
||||
return new HashSet<>();
|
||||
}
|
||||
Set<String> fieldNameList = matchedElements.stream()
|
||||
.filter(schemaElementMatch -> {
|
||||
SchemaElementType elementType = schemaElementMatch.getElement().getType();
|
||||
return SchemaElementType.METRIC.equals(elementType)
|
||||
|| SchemaElementType.DIMENSION.equals(elementType)
|
||||
|| SchemaElementType.VALUE.equals(elementType);
|
||||
})
|
||||
.map(schemaElementMatch -> {
|
||||
SchemaElementType elementType = schemaElementMatch.getElement().getType();
|
||||
|
||||
if (!SchemaElementType.VALUE.equals(elementType)) {
|
||||
return schemaElementMatch.getWord();
|
||||
}
|
||||
return itemIdToName.get(schemaElementMatch.getElement().getId());
|
||||
})
|
||||
.filter(name -> StringUtils.isNotEmpty(name) && !name.contains("%"))
|
||||
.collect(Collectors.toSet());
|
||||
return fieldNameList;
|
||||
}
|
||||
|
||||
private Set<String> getTopNFieldNames(Long modelId, SemanticSchema semanticSchema,
|
||||
LLMParserConfig llmParserConfig) {
|
||||
Set<String> results = semanticSchema.getDimensions(modelId).stream()
|
||||
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
||||
.limit(llmParserConfig.getDimensionTopN())
|
||||
.map(entry -> entry.getName())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
Set<String> metrics = semanticSchema.getMetrics(modelId).stream()
|
||||
.sorted(Comparator.comparing(SchemaElement::getUseCnt).reversed())
|
||||
.limit(llmParserConfig.getMetricTopN())
|
||||
.map(entry -> entry.getName())
|
||||
.collect(Collectors.toSet());
|
||||
|
||||
results.addAll(metrics);
|
||||
return results;
|
||||
}
|
||||
|
||||
protected Map<Long, String> getItemIdToName(Long modelId, SemanticSchema semanticSchema) {
|
||||
return semanticSchema.getDimensions(modelId).stream()
|
||||
.collect(Collectors.toMap(SchemaElement::getId, SchemaElement::getName, (value1, value2) -> value2));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,157 +0,0 @@
|
||||
package com.tencent.supersonic.chat.parser.llm.interpret;
|
||||
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.tencent.supersonic.chat.agent.Agent;
|
||||
import com.tencent.supersonic.chat.agent.tool.AgentToolType;
|
||||
import com.tencent.supersonic.chat.agent.tool.MetricInterpretTool;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticInterpreter;
|
||||
import com.tencent.supersonic.chat.api.component.SemanticParser;
|
||||
import com.tencent.supersonic.chat.api.pojo.QueryContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.ChatContext;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementMatch;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElement;
|
||||
import com.tencent.supersonic.chat.api.pojo.SchemaElementType;
|
||||
import com.tencent.supersonic.chat.api.pojo.SemanticParseInfo;
|
||||
import com.tencent.supersonic.chat.api.pojo.ModelSchema;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryFilter;
|
||||
import com.tencent.supersonic.chat.api.pojo.request.QueryReq;
|
||||
import com.tencent.supersonic.chat.parser.SatisfactionChecker;
|
||||
import com.tencent.supersonic.chat.query.llm.interpret.MetricInterpretQuery;
|
||||
import com.tencent.supersonic.chat.query.QueryManager;
|
||||
import com.tencent.supersonic.chat.query.plugin.PluginSemanticQuery;
|
||||
import com.tencent.supersonic.chat.service.AgentService;
|
||||
import com.tencent.supersonic.chat.utils.ComponentFactory;
|
||||
import com.tencent.supersonic.common.pojo.DateConf;
|
||||
import com.tencent.supersonic.common.util.ContextUtils;
|
||||
import com.tencent.supersonic.semantic.api.model.enums.TimeDimensionEnum;
|
||||
import com.tencent.supersonic.semantic.api.query.enums.FilterOperatorEnum;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Slf4j
|
||||
public class MetricInterpretParser implements SemanticParser {
|
||||
|
||||
@Override
|
||||
public void parse(QueryContext queryContext, ChatContext chatContext) {
|
||||
if (SatisfactionChecker.check(queryContext)) {
|
||||
log.info("skip MetricInterpretParser");
|
||||
return;
|
||||
}
|
||||
Map<Long, MetricInterpretTool> metricInterpretToolMap =
|
||||
getMetricInterpretTools(queryContext.getRequest().getAgentId());
|
||||
log.info("metric interpret tool : {}", metricInterpretToolMap);
|
||||
if (CollectionUtils.isEmpty(metricInterpretToolMap)) {
|
||||
return;
|
||||
}
|
||||
Map<Long, List<SchemaElementMatch>> elementMatches = queryContext.getMapInfo().getModelElementMatches();
|
||||
for (Long modelId : elementMatches.keySet()) {
|
||||
MetricInterpretTool metricInterpretTool = metricInterpretToolMap.get(modelId);
|
||||
if (metricInterpretTool == null) {
|
||||
continue;
|
||||
}
|
||||
if (CollectionUtils.isEmpty(elementMatches.get(modelId))) {
|
||||
continue;
|
||||
}
|
||||
List<MetricOption> metricOptions = metricInterpretTool.getMetricOptions();
|
||||
if (!CollectionUtils.isEmpty(metricOptions)) {
|
||||
List<Long> metricIds = metricOptions.stream()
|
||||
.map(MetricOption::getMetricId).collect(Collectors.toList());
|
||||
String name = metricInterpretTool.getName();
|
||||
buildQuery(modelId, queryContext, metricIds, elementMatches.get(modelId), name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void buildQuery(Long modelId, QueryContext queryContext,
|
||||
List<Long> metricIds, List<SchemaElementMatch> schemaElementMatches, String toolName) {
|
||||
PluginSemanticQuery metricInterpretQuery = QueryManager.createPluginQuery(MetricInterpretQuery.QUERY_MODE);
|
||||
Set<SchemaElement> metrics = getMetrics(metricIds, modelId);
|
||||
SemanticParseInfo semanticParseInfo = buildSemanticParseInfo(modelId, queryContext.getRequest(),
|
||||
metrics, schemaElementMatches, toolName);
|
||||
semanticParseInfo.setQueryMode(metricInterpretQuery.getQueryMode());
|
||||
semanticParseInfo.getProperties().put("queryText", queryContext.getRequest().getQueryText());
|
||||
metricInterpretQuery.setParseInfo(semanticParseInfo);
|
||||
queryContext.getCandidateQueries().add(metricInterpretQuery);
|
||||
}
|
||||
|
||||
public Set<SchemaElement> getMetrics(List<Long> metricIds, Long modelId) {
|
||||
SemanticInterpreter semanticInterpreter = ComponentFactory.getSemanticLayer();
|
||||
ModelSchema modelSchema = semanticInterpreter.getModelSchema(modelId, true);
|
||||
Set<SchemaElement> metrics = modelSchema.getMetrics();
|
||||
return metrics.stream().filter(schemaElement -> metricIds.contains(schemaElement.getId()))
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private Map<Long, MetricInterpretTool> getMetricInterpretTools(Integer agentId) {
|
||||
AgentService agentService = ContextUtils.getBean(AgentService.class);
|
||||
Agent agent = agentService.getAgent(agentId);
|
||||
if (agent == null) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
List<String> tools = agent.getTools(AgentToolType.INTERPRET);
|
||||
if (CollectionUtils.isEmpty(tools)) {
|
||||
return new HashMap<>();
|
||||
}
|
||||
List<MetricInterpretTool> metricInterpretTools = tools.stream().map(tool ->
|
||||
JSONObject.parseObject(tool, MetricInterpretTool.class))
|
||||
.filter(tool -> !CollectionUtils.isEmpty(tool.getMetricOptions()))
|
||||
.collect(Collectors.toList());
|
||||
Map<Long, MetricInterpretTool> metricInterpretToolMap = new HashMap<>();
|
||||
for (MetricInterpretTool metricInterpretTool : metricInterpretTools) {
|
||||
metricInterpretToolMap.putIfAbsent(metricInterpretTool.getModelId(),
|
||||
metricInterpretTool);
|
||||
}
|
||||
return metricInterpretToolMap;
|
||||
}
|
||||
|
||||
private SemanticParseInfo buildSemanticParseInfo(Long modelId, QueryReq queryReq, Set<SchemaElement> metrics,
|
||||
List<SchemaElementMatch> schemaElementMatches, String toolName) {
|
||||
SchemaElement model = new SchemaElement();
|
||||
model.setModel(modelId);
|
||||
model.setId(modelId);
|
||||
SemanticParseInfo semanticParseInfo = new SemanticParseInfo();
|
||||
semanticParseInfo.setMetrics(metrics);
|
||||
SchemaElement dimension = new SchemaElement();
|
||||
dimension.setBizName(TimeDimensionEnum.DAY.getName());
|
||||
semanticParseInfo.setDimensions(Sets.newHashSet(dimension));
|
||||
semanticParseInfo.setElementMatches(schemaElementMatches);
|
||||
semanticParseInfo.setModel(model);
|
||||
semanticParseInfo.setScore(queryReq.getQueryText().length());
|
||||
DateConf dateConf = new DateConf();
|
||||
dateConf.setDateMode(DateConf.DateMode.RECENT);
|
||||
dateConf.setUnit(15);
|
||||
semanticParseInfo.setDateInfo(dateConf);
|
||||
Map<String, Object> properties = new HashMap<>();
|
||||
properties.put("type", "internal");
|
||||
properties.put("name", toolName);
|
||||
semanticParseInfo.setProperties(properties);
|
||||
fillSemanticParseInfo(semanticParseInfo);
|
||||
return semanticParseInfo;
|
||||
}
|
||||
|
||||
private void fillSemanticParseInfo(SemanticParseInfo semanticParseInfo) {
|
||||
List<SchemaElementMatch> schemaElementMatches = semanticParseInfo.getElementMatches();
|
||||
if (!CollectionUtils.isEmpty(schemaElementMatches)) {
|
||||
schemaElementMatches.stream().filter(schemaElementMatch ->
|
||||
SchemaElementType.VALUE.equals(schemaElementMatch.getElement().getType())
|
||||
|| SchemaElementType.ID.equals(schemaElementMatch.getElement().getType()))
|
||||
.forEach(schemaElementMatch -> {
|
||||
QueryFilter queryFilter = new QueryFilter();
|
||||
queryFilter.setValue(schemaElementMatch.getWord());
|
||||
queryFilter.setElementID(schemaElementMatch.getElement().getId());
|
||||
queryFilter.setName(schemaElementMatch.getElement().getName());
|
||||
queryFilter.setOperator(FilterOperatorEnum.EQUALS);
|
||||
queryFilter.setBizName(schemaElementMatch.getElement().getBizName());
|
||||
semanticParseInfo.getDimensionFilters().add(queryFilter);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user