mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
(feat) add benchmark test (#1227)
This commit is contained in:
83
benchmark/benchmark.md
Normal file
83
benchmark/benchmark.md
Normal file
@@ -0,0 +1,83 @@
|
||||
## 使用场景
|
||||
产品上线阶段批量测试问答对话的问题,统计测试结果。
|
||||
注意:与evaluation模块的区别,evaluation是构建数据集多个模型的横向评估,benchmark是选定模型下,批量自动化业务问题的测试。
|
||||
## 功能说明
|
||||
批量自动化测试问答对话测试,支持单轮问答测试。
|
||||
|
||||
## 使用说明
|
||||
注意:建议在开发测试环境的执行,如果需要在生产环境的测试,请避开用户使用高峰期。
|
||||
1. 准备测试问题
|
||||
|
||||
将问题写入`test_data.csv`文件,格式如下:
|
||||
```csv
|
||||
question
|
||||
各BG期间在职、入职、离职人员的平均薪资是多少?(注意:薪资不包括香港视源、广视以及并购控股子公司青松、仙视的数据。)
|
||||
各BG期间入职且仍在职的人数有多少?
|
||||
各BG当月的净增长人数及其增长率是多少?
|
||||
```
|
||||
将文件放入`benchmark/data`目录下。
|
||||
|
||||
2. 执行测试
|
||||
```bash
|
||||
python benchmark -u http://localhost:3100 -a 6 -c 141 -f data/renli.csv -p zds
|
||||
```
|
||||
参数说明:
|
||||
- -a: 问答对话的id
|
||||
- -c: chat_id
|
||||
- -f: 测试问题文件
|
||||
- -u: 用户id
|
||||
如果执行报错,没有安装相关python包,可以执行`pip install -r requirements.txt`安装相关包。
|
||||
|
||||
3. 查看测试结果
|
||||
当前,只能在数据库中查看测试结果。
|
||||
```sql
|
||||
select question_id,chat_id,create_time,query_text,
|
||||
JSON_EXTRACT(parse_info,'$.sqlInfo.s2SQL') as s2sql,
|
||||
JSON_EXTRACT(parse_info,'$.sqlInfo.correctS2SQL') as correctS2SQL,
|
||||
JSON_EXTRACT(parse_info,'$.sqlInfo.querySQL') as querySQL,
|
||||
'请标记正确的SQL' as correctSQL,
|
||||
'请标记生成SQL是否正确' as isOk,
|
||||
'请分类不正确的原因' as reason
|
||||
from s2_chat_parse scp where user_name = 'zhaodongsheng' and chat_id = '141';
|
||||
|
||||
select question_id,chat_id,create_time,query_text,
|
||||
JSON_EXTRACT(query_result,'$.querySql') as querySql,
|
||||
JSON_EXTRACT(query_result,'$.queryResults') as queryResults
|
||||
from s2_chat_query where user_name = 'zhaodongsheng' and chat_id = '141' and query_state = 1;
|
||||
|
||||
```
|
||||
4. 查看帮助
|
||||
```bash
|
||||
python benchmark.py --help
|
||||
usage: benchmark.py [-h] -u URL -a AGENTID -c CHATID -f FILEPATH -p USERNAME
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-u URL, --url URL url:问答系统url,例如:https://chatdata-dev.test.com
|
||||
-a AGENTID, --agentId AGENTID
|
||||
agentId:助手ID
|
||||
-c CHATID, --chatId CHATID
|
||||
chatId:会话ID,需要通过浏览器开发者模式获取
|
||||
-f FILEPATH, --filePath FILEPATH
|
||||
filePath:问题文件路径, csv格式. 请提前上传到benchmark/data目录下
|
||||
-p USERNAME, --userName USERNAME
|
||||
userName:用户名,用户获取登录token
|
||||
```
|
||||
|
||||
## 演示效果
|
||||
```bash
|
||||
python benchmark.py -u https://chatdata-dev.test.com -a 3 -c 35 -f data/shuce.csv -p zds
|
||||
批量测试配置信息[url: https://chatdata-dev.test.com agentId: 3 chatId: 35 filePath: data/shuce.csv userName: zds ]
|
||||
请确认输入的压力测试信息是否正确:
|
||||
1. Yes
|
||||
2. No
|
||||
1
|
||||
start to ask question: 各BG期间在职、入职、离职人员的平均薪资是多少?(注意:薪资不包括香港视源、广视以及并购控股子公司青松、仙视的数据。)
|
||||
start to ask question: 各BG期间入职且仍在职的人数有多少?
|
||||
start to ask question: 各BG当月的净增长人数及其增长率是多少?
|
||||
```
|
||||
|
||||
## TODO
|
||||
- [x] 问答对话测试
|
||||
- [ ] 多轮对话测试
|
||||
- [ ] 问答对话测试结果展示
|
||||
104
benchmark/benchmark.py
Normal file
104
benchmark/benchmark.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- encoding: utf-8 -*-
|
||||
# -----------------------------------------------------------------------------------
|
||||
'''
|
||||
@filename : batchmark.py
|
||||
@time : 2024/06/20
|
||||
@author : zhaodongsheng
|
||||
@Version : 1.0
|
||||
@description : 批量问答测试
|
||||
'''
|
||||
# -----------------------------------------------------------------------------------
|
||||
import pandas as pd
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
import jwt
|
||||
import traceback
|
||||
|
||||
class BatchTest:
|
||||
def __init__(self, url, agentId, chatId, userName):
|
||||
self.base_url = url + '/api/chat/query/'
|
||||
self.agentId = agentId
|
||||
self.auth_token = self.__get_authorization(userName)
|
||||
self.chatId = chatId
|
||||
|
||||
def parse(self, query_text):
|
||||
url = self.base_url + 'parse'
|
||||
data = {
|
||||
'queryText': query_text,
|
||||
'agentId': self.agentId,
|
||||
'chatId': self.chatId,
|
||||
}
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + self.auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, data=json.dumps(data))
|
||||
return response.json()
|
||||
|
||||
def execute(self, query_text, queryId):
|
||||
url = self.base_url + 'execute'
|
||||
data = {
|
||||
'queryText': query_text,
|
||||
'parseId': 1,
|
||||
'chatId': self.chatId,
|
||||
'queryId': queryId,
|
||||
}
|
||||
headers = {
|
||||
'Authorization': 'Bearer ' + self.auth_token,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, data=json.dumps(data))
|
||||
return response.json()
|
||||
|
||||
def read_question_from_csv(self, filePath):
|
||||
df = pd.read_csv(filePath)
|
||||
return df
|
||||
|
||||
def __get_authorization(self, userName):
|
||||
# secret 请和 com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig.tokenAppSecret 保持一致
|
||||
secret = "WIaO9YRRVt+7QtpPvyWsARFngnEcbaKBk783uGFwMrbJBaochsqCH62L4Kijcb0sZCYoSsiKGV/zPml5MnZ3uQ=="
|
||||
exp = time.time() + 100000000
|
||||
token= jwt.encode({"token_userName": userName,"exp": exp}, secret, algorithm="HS512")
|
||||
return token
|
||||
|
||||
|
||||
def benchmark(url:str, agentId:str, chatId:str, filePath:str, userName:str):
|
||||
batch_test = BatchTest(url, agentId, chatId, userName)
|
||||
df = batch_test.read_question_from_csv(filePath)
|
||||
for index, row in df.iterrows():
|
||||
question = row['question']
|
||||
print('start to ask question:', question)
|
||||
# 捕获异常,防止程序中断
|
||||
try:
|
||||
parse_resp = batch_test.parse(question)
|
||||
batch_test.execute(question, parse_resp['data']['queryId'])
|
||||
except Exception as e:
|
||||
print('error:', e)
|
||||
traceback.print_exc()
|
||||
continue
|
||||
time.sleep(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-u', '--url', type=str, required=True, help='url:问答系统url,例如:https://chatdata-dev.test.com')
|
||||
parser.add_argument('-a', '--agentId', type=str, required=True, help='agentId:助手ID')
|
||||
parser.add_argument('-c', '--chatId', type=str, required=True, help='chatId:会话ID,需要通过浏览器开发者模式获取')
|
||||
parser.add_argument('-f', '--filePath', type=str, required=True, help='filePath:问题文件路径, csv格式. 请提前上传到benchmark/data目录下')
|
||||
parser.add_argument('-p', '--userName', type=str, required=True, help='userName:用户名,用户获取登录token')
|
||||
args = parser.parse_args()
|
||||
|
||||
print('批量测试配置信息[url:', args.url,'agentId:', args.agentId, 'chatId:', args.chatId, 'filePath:', args.filePath, 'userName:', args.userName, ']')
|
||||
print('请确认输入的压力测试信息是否正确:')
|
||||
print('1. Yes')
|
||||
print('2. No')
|
||||
confirm = input()
|
||||
if confirm == '1' or confirm == 'Yes' or confirm == 'yes' or confirm == 'YES':
|
||||
benchmark(args.url, args.agentId, args.chatId, args.filePath, args.userName)
|
||||
else:
|
||||
print('请重新输入压力测试配置信息: url, agentId, chatId, filePath, userName')
|
||||
3
benchmark/data/caiwu.csv
Normal file
3
benchmark/data/caiwu.csv
Normal file
@@ -0,0 +1,3 @@
|
||||
question
|
||||
每个业务组(BG)的员工人数是多少?
|
||||
每个业务组的损益情况如何?
|
||||
|
8
benchmark/data/renli.csv
Normal file
8
benchmark/data/renli.csv
Normal file
@@ -0,0 +1,8 @@
|
||||
question
|
||||
在职人员的男女比例是多少?
|
||||
期间入职且离职的人数及其占比如何?
|
||||
期间新入职社招人员的平均年龄是多少?
|
||||
期间入职且在职的人数有多少?
|
||||
期间在职人员的平均年龄是多少?
|
||||
当月的净增长人数及其增长率是多少?
|
||||
期间新入职社招人员的年龄分布情况如何?
|
||||
|
4
benchmark/data/shuce.csv
Normal file
4
benchmark/data/shuce.csv
Normal file
@@ -0,0 +1,4 @@
|
||||
question
|
||||
在广东省内,哪一个学校的累计集备数最多,请返回该学校的学校名称
|
||||
在广东省内,哪一个学校的累计集体备课数最多,请返回该学校的学校名称
|
||||
|
||||
|
5
benchmark/requirements.txt
Normal file
5
benchmark/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
pandas==2.0.3
|
||||
PyJWT==2.8.0
|
||||
requests==2.28.2
|
||||
|
||||
|
||||
Reference in New Issue
Block a user