mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-10 11:07:06 +00:00
(feat) add benchmark test (#1227)
This commit is contained in:
83
benchmark/benchmark.md
Normal file
83
benchmark/benchmark.md
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
## 使用场景
|
||||||
|
产品上线阶段批量测试问答对话的问题,统计测试结果。
|
||||||
|
注意:与evaluation模块的区别,evaluation是构建数据集多个模型的横向评估,benchmark是选定模型下,批量自动化业务问题的测试。
|
||||||
|
## 功能说明
|
||||||
|
批量自动化测试问答对话测试,支持单轮问答测试。
|
||||||
|
|
||||||
|
## 使用说明
|
||||||
|
注意:建议在开发测试环境的执行,如果需要在生产环境的测试,请避开用户使用高峰期。
|
||||||
|
1. 准备测试问题
|
||||||
|
|
||||||
|
将问题写入`test_data.csv`文件,格式如下:
|
||||||
|
```csv
|
||||||
|
question
|
||||||
|
各BG期间在职、入职、离职人员的平均薪资是多少?(注意:薪资不包括香港视源、广视以及并购控股子公司青松、仙视的数据。)
|
||||||
|
各BG期间入职且仍在职的人数有多少?
|
||||||
|
各BG当月的净增长人数及其增长率是多少?
|
||||||
|
```
|
||||||
|
将文件放入`benchmark/data`目录下。
|
||||||
|
|
||||||
|
2. 执行测试
|
||||||
|
```bash
|
||||||
|
python benchmark -u http://localhost:3100 -a 6 -c 141 -f data/renli.csv -p zds
|
||||||
|
```
|
||||||
|
参数说明:
|
||||||
|
- -a: 问答对话的id
|
||||||
|
- -c: chat_id
|
||||||
|
- -f: 测试问题文件
|
||||||
|
- -u: 用户id
|
||||||
|
如果执行报错,没有安装相关python包,可以执行`pip install -r requirements.txt`安装相关包。
|
||||||
|
|
||||||
|
3. 查看测试结果
|
||||||
|
当前,只能在数据库中查看测试结果。
|
||||||
|
```sql
|
||||||
|
select question_id,chat_id,create_time,query_text,
|
||||||
|
JSON_EXTRACT(parse_info,'$.sqlInfo.s2SQL') as s2sql,
|
||||||
|
JSON_EXTRACT(parse_info,'$.sqlInfo.correctS2SQL') as correctS2SQL,
|
||||||
|
JSON_EXTRACT(parse_info,'$.sqlInfo.querySQL') as querySQL,
|
||||||
|
'请标记正确的SQL' as correctSQL,
|
||||||
|
'请标记生成SQL是否正确' as isOk,
|
||||||
|
'请分类不正确的原因' as reason
|
||||||
|
from s2_chat_parse scp where user_name = 'zhaodongsheng' and chat_id = '141';
|
||||||
|
|
||||||
|
select question_id,chat_id,create_time,query_text,
|
||||||
|
JSON_EXTRACT(query_result,'$.querySql') as querySql,
|
||||||
|
JSON_EXTRACT(query_result,'$.queryResults') as queryResults
|
||||||
|
from s2_chat_query where user_name = 'zhaodongsheng' and chat_id = '141' and query_state = 1;
|
||||||
|
|
||||||
|
```
|
||||||
|
4. 查看帮助
|
||||||
|
```bash
|
||||||
|
python benchmark.py --help
|
||||||
|
usage: benchmark.py [-h] -u URL -a AGENTID -c CHATID -f FILEPATH -p USERNAME
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
-u URL, --url URL url:问答系统url,例如:https://chatdata-dev.test.com
|
||||||
|
-a AGENTID, --agentId AGENTID
|
||||||
|
agentId:助手ID
|
||||||
|
-c CHATID, --chatId CHATID
|
||||||
|
chatId:会话ID,需要通过浏览器开发者模式获取
|
||||||
|
-f FILEPATH, --filePath FILEPATH
|
||||||
|
filePath:问题文件路径, csv格式. 请提前上传到benchmark/data目录下
|
||||||
|
-p USERNAME, --userName USERNAME
|
||||||
|
userName:用户名,用户获取登录token
|
||||||
|
```
|
||||||
|
|
||||||
|
## 演示效果
|
||||||
|
```bash
|
||||||
|
python benchmark.py -u https://chatdata-dev.test.com -a 3 -c 35 -f data/shuce.csv -p zds
|
||||||
|
批量测试配置信息[url: https://chatdata-dev.test.com agentId: 3 chatId: 35 filePath: data/shuce.csv userName: zds ]
|
||||||
|
请确认输入的压力测试信息是否正确:
|
||||||
|
1. Yes
|
||||||
|
2. No
|
||||||
|
1
|
||||||
|
start to ask question: 各BG期间在职、入职、离职人员的平均薪资是多少?(注意:薪资不包括香港视源、广视以及并购控股子公司青松、仙视的数据。)
|
||||||
|
start to ask question: 各BG期间入职且仍在职的人数有多少?
|
||||||
|
start to ask question: 各BG当月的净增长人数及其增长率是多少?
|
||||||
|
```
|
||||||
|
|
||||||
|
## TODO
|
||||||
|
- [x] 问答对话测试
|
||||||
|
- [ ] 多轮对话测试
|
||||||
|
- [ ] 问答对话测试结果展示
|
||||||
104
benchmark/benchmark.py
Normal file
104
benchmark/benchmark.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
# -----------------------------------------------------------------------------------
|
||||||
|
'''
|
||||||
|
@filename : batchmark.py
|
||||||
|
@time : 2024/06/20
|
||||||
|
@author : zhaodongsheng
|
||||||
|
@Version : 1.0
|
||||||
|
@description : 批量问答测试
|
||||||
|
'''
|
||||||
|
# -----------------------------------------------------------------------------------
|
||||||
|
import pandas as pd
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import jwt
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
class BatchTest:
|
||||||
|
def __init__(self, url, agentId, chatId, userName):
|
||||||
|
self.base_url = url + '/api/chat/query/'
|
||||||
|
self.agentId = agentId
|
||||||
|
self.auth_token = self.__get_authorization(userName)
|
||||||
|
self.chatId = chatId
|
||||||
|
|
||||||
|
def parse(self, query_text):
|
||||||
|
url = self.base_url + 'parse'
|
||||||
|
data = {
|
||||||
|
'queryText': query_text,
|
||||||
|
'agentId': self.agentId,
|
||||||
|
'chatId': self.chatId,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer ' + self.auth_token,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(url, headers=headers, data=json.dumps(data))
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def execute(self, query_text, queryId):
|
||||||
|
url = self.base_url + 'execute'
|
||||||
|
data = {
|
||||||
|
'queryText': query_text,
|
||||||
|
'parseId': 1,
|
||||||
|
'chatId': self.chatId,
|
||||||
|
'queryId': queryId,
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'Authorization': 'Bearer ' + self.auth_token,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}
|
||||||
|
|
||||||
|
response = requests.post(url, headers=headers, data=json.dumps(data))
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def read_question_from_csv(self, filePath):
|
||||||
|
df = pd.read_csv(filePath)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def __get_authorization(self, userName):
|
||||||
|
# secret 请和 com.tencent.supersonic.auth.api.authentication.config.AuthenticationConfig.tokenAppSecret 保持一致
|
||||||
|
secret = "WIaO9YRRVt+7QtpPvyWsARFngnEcbaKBk783uGFwMrbJBaochsqCH62L4Kijcb0sZCYoSsiKGV/zPml5MnZ3uQ=="
|
||||||
|
exp = time.time() + 100000000
|
||||||
|
token= jwt.encode({"token_userName": userName,"exp": exp}, secret, algorithm="HS512")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark(url:str, agentId:str, chatId:str, filePath:str, userName:str):
|
||||||
|
batch_test = BatchTest(url, agentId, chatId, userName)
|
||||||
|
df = batch_test.read_question_from_csv(filePath)
|
||||||
|
for index, row in df.iterrows():
|
||||||
|
question = row['question']
|
||||||
|
print('start to ask question:', question)
|
||||||
|
# 捕获异常,防止程序中断
|
||||||
|
try:
|
||||||
|
parse_resp = batch_test.parse(question)
|
||||||
|
batch_test.execute(question, parse_resp['data']['queryId'])
|
||||||
|
except Exception as e:
|
||||||
|
print('error:', e)
|
||||||
|
traceback.print_exc()
|
||||||
|
continue
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-u', '--url', type=str, required=True, help='url:问答系统url,例如:https://chatdata-dev.test.com')
|
||||||
|
parser.add_argument('-a', '--agentId', type=str, required=True, help='agentId:助手ID')
|
||||||
|
parser.add_argument('-c', '--chatId', type=str, required=True, help='chatId:会话ID,需要通过浏览器开发者模式获取')
|
||||||
|
parser.add_argument('-f', '--filePath', type=str, required=True, help='filePath:问题文件路径, csv格式. 请提前上传到benchmark/data目录下')
|
||||||
|
parser.add_argument('-p', '--userName', type=str, required=True, help='userName:用户名,用户获取登录token')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print('批量测试配置信息[url:', args.url,'agentId:', args.agentId, 'chatId:', args.chatId, 'filePath:', args.filePath, 'userName:', args.userName, ']')
|
||||||
|
print('请确认输入的压力测试信息是否正确:')
|
||||||
|
print('1. Yes')
|
||||||
|
print('2. No')
|
||||||
|
confirm = input()
|
||||||
|
if confirm == '1' or confirm == 'Yes' or confirm == 'yes' or confirm == 'YES':
|
||||||
|
benchmark(args.url, args.agentId, args.chatId, args.filePath, args.userName)
|
||||||
|
else:
|
||||||
|
print('请重新输入压力测试配置信息: url, agentId, chatId, filePath, userName')
|
||||||
3
benchmark/data/caiwu.csv
Normal file
3
benchmark/data/caiwu.csv
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
question
|
||||||
|
每个业务组(BG)的员工人数是多少?
|
||||||
|
每个业务组的损益情况如何?
|
||||||
|
8
benchmark/data/renli.csv
Normal file
8
benchmark/data/renli.csv
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
question
|
||||||
|
在职人员的男女比例是多少?
|
||||||
|
期间入职且离职的人数及其占比如何?
|
||||||
|
期间新入职社招人员的平均年龄是多少?
|
||||||
|
期间入职且在职的人数有多少?
|
||||||
|
期间在职人员的平均年龄是多少?
|
||||||
|
当月的净增长人数及其增长率是多少?
|
||||||
|
期间新入职社招人员的年龄分布情况如何?
|
||||||
|
4
benchmark/data/shuce.csv
Normal file
4
benchmark/data/shuce.csv
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
question
|
||||||
|
在广东省内,哪一个学校的累计集备数最多,请返回该学校的学校名称
|
||||||
|
在广东省内,哪一个学校的累计集体备课数最多,请返回该学校的学校名称
|
||||||
|
|
||||||
|
5
benchmark/requirements.txt
Normal file
5
benchmark/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
pandas==2.0.3
|
||||||
|
PyJWT==2.8.0
|
||||||
|
requests==2.28.2
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user