mirror of
https://github.com/tencentmusic/supersonic.git
synced 2025-12-12 04:27:39 +00:00
Add feature to s2sql that allow few-shots example tied to data model. (#571)
* 1.refactor the retrieval module. 2.refactor the http service module. 3.upgrade text2sql output format the parse for absolute time related expression in query. * fix bug. * upgrade the config module, now support config llm suppoted by langchain. * fix conflicts. * update text2sql config reload to be compitable with new config format. * modify default config. * 1.add self-consistency feature for text2sql. 2.upgrade llm api call from sync to async. 3.refactor text2sql module. 4. refactor semantical retriever modules. * merege with upstream master * add general retrieve service. * add api service for sql_agent for crud opereations of few-shots examples. * modify requirements * add auto-cot feature * 1. output log to a fixed log file. 2.allow few-shots examples tied to data model, and add strategy that extend examples when retrieved examples tied to a data model is not enough. 3. fix misformat in s2ql args. * add prior_ext to output. --------- Co-authored-by: shaoweigong <shaoweigong@tencent.com>
This commit is contained in:
374
chat/core/src/main/python/few_shot_example/sql_examplar.py
Normal file
374
chat/core/src/main/python/few_shot_example/sql_examplar.py
Normal file
@@ -0,0 +1,374 @@
|
||||
examplars= [
|
||||
{ "currentDate":"2020-12-01",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||
"question":"比较jackjchen和robinlee在内容库的访问次数",
|
||||
"priorSchemaLinks":"""['jackjchen'->用户名, 'robinlee'->用户名]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“比较jackjchen和robinlee在内容库的访问次数“中,我们被问:
|
||||
“比较jackjchen和robinlee”,所以我们需要column=[用户名],cell values = ['jackjchen', 'robinlee'],所以有[用户名:('jackjchen', 'robinlee')]
|
||||
”内容库的访问次数“,所以我们需要column=[访问次数]""",
|
||||
"schemaLinks":"""["用户名":("'jackjchen'", "'robinlee'"), "访问次数"]""",
|
||||
"sql":"""select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee')"""
|
||||
},
|
||||
{ "currentDate":"2022-11-06",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||
"question":"内容库近12个月访问人数 按部门",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库近12个月访问人数 按部门“中,我们被问:
|
||||
”内容库近12个月“,所以我们需要column=[数据日期],cell values = [12],所以有[数据日期:(12)]
|
||||
“访问人数”,所以我们需要column=[访问人数]
|
||||
”按部门“,所以我们需要column=[部门]""",
|
||||
"schemaLinks":"""["数据日期":(12), "访问人数", "部门"]""",
|
||||
"sql":"""select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 """
|
||||
},
|
||||
{ "currentDate":"2023-04-21",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||
"question":"内容库美术部、技术研发部的访问时长",
|
||||
"priorSchemaLinks":"""['美术部'->部门, '技术研发部'->部门]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库美术部、技术研发部的访问时长“中,我们被问:
|
||||
“访问时长”,所以我们需要column=[访问时长]
|
||||
”内容库美术部、技术研发部“,所以我们需要column=[部门], cell values = ['美术部', '技术研发部'],所以有[部门:('美术部', '技术研发部')]""",
|
||||
"schemaLinks":"""["访问时长", "部门":("'美术部'", "'技术研发部'")]""",
|
||||
"sql":"""select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部')"""
|
||||
},
|
||||
{ "currentDate":"2023-08-21",
|
||||
"tableName":"严选",
|
||||
"fieldsList":"""["严选版权归属系", "付费模式", "结算播放份额", "付费用户结算播放份额", "数据日期"]""",
|
||||
"question":"近3天海田飞系MPPM结算播放份额",
|
||||
"priorSchemaLinks":"""['海田飞系'->严选版权归属系]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“近3天海田飞系MPPM结算播放份额“中,我们被问:
|
||||
“MPPM结算播放份额”,所以我们需要column=[结算播放份额],
|
||||
”海田飞系“,所以我们需要column=[严选版权归属系], cell values = ['海田飞系'],所以有[严选版权归属系:('海田飞系')],
|
||||
”近3天“,所以我们需要column=[数据日期], cell values = [3],所以有[数据日期:(3)]""",
|
||||
"schemaLinks":"""["结算播放份额", "严选版权归属系":("'海田飞系'"), "数据日期":(3)]""",
|
||||
"sql":"""select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 """
|
||||
},
|
||||
{ "currentDate":"2023-05-22",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["是否潮流人歌曲", "C音歌曲ID", "C音歌曲MID", "歌曲名", "歌曲版本", "语种", "歌曲类型", "翻唱类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "结算播放量", "运营播放量", "付费用户结算播放量", "历史累计结算播放量", "运营搜播量", "结算搜播量", "运营完播量", "运营推播量", "近7日复播率", "日均搜播量", "数据日期"]""",
|
||||
"question":"对比近7天翻唱版和纯音乐的歌曲播放量",
|
||||
"priorSchemaLinks":"""['纯音乐'->语种, '翻唱版'->歌曲版本]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“对比近3天翻唱版和纯音乐的歌曲播放量“中,我们被问:
|
||||
“歌曲播放量”,所以我们需要column=[结算播放量]
|
||||
”翻唱版“,所以我们需要column=[歌曲版本], cell values = ['翻唱版'],所以有[歌曲版本:('翻唱版')]
|
||||
”和纯音乐的歌曲“,所以我们需要column=[语种], cell values = ['纯音乐'],所以有[语种:('纯音乐')]
|
||||
”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]""",
|
||||
"schemaLinks":"""["结算播放量", "歌曲版本":("'翻唱版'"), "语种":("'纯音乐'"), "数据日期":(7)]""",
|
||||
"sql":"""select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 """
|
||||
},
|
||||
{ "currentDate":"2023-05-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"""["上下架状态", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "活跃区域", "年龄", "歌手才能", "歌手风格", "粉丝数", "潮音粉丝数", "超声波粉丝数", "推博粉丝数", "超声波歌曲数", "在架歌曲数", "超声波分享数", "独占歌曲数", "超声波在架歌曲评论数", "有播放量歌曲数", "数据日期"]""",
|
||||
"question":"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数",
|
||||
"priorSchemaLinks":"""['1527896'->MPPM歌手ID, '1565463'->MPPM歌手ID, '2141459'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数“中,我们被问:
|
||||
“粉丝数”,所以我们需要column=[粉丝数]
|
||||
”陈拙悬、孟梅琦、赖媚韵“,所以我们需要column=[歌手名], cell values = ['陈拙悬', '孟梅琦', '赖媚韵'],所以有[歌手名:('陈拙悬', '孟梅琦', '赖媚韵')]""",
|
||||
"schemaLinks":"""["粉丝数", "歌手名":("'陈拙悬'", "'孟梅琦'", "'赖媚韵'")]""",
|
||||
"sql":"""select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵')"""
|
||||
},
|
||||
{ "currentDate":"2023-07-31",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"播放量大于1万的歌曲有多少",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“播放量大于1万的歌曲有多少“中,我们被问:
|
||||
“歌曲有多少”,所以我们需要column=[歌曲名]
|
||||
”播放量大于1万的“,所以我们需要column=[结算播放量], cell values = [10000],所以有[结算播放量:(10000)]""",
|
||||
"schemaLinks":"""["歌曲名", "结算播放量":(10000)]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 结算播放量 > 10000"""
|
||||
},
|
||||
{ "currentDate":"2023-07-31",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库访问时长小于1小时,且来自美术部的用户是哪些",
|
||||
"priorSchemaLinks":"""['美术部'->部门]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库访问时长小于1小时,且来自美术部的用户是哪些“中,我们被问:
|
||||
“用户是哪些”,所以我们需要column=[用户名]
|
||||
”美术部的“,所以我们需要column=[部门], cell values = ['美术部'],所以有[部门:('美术部')]
|
||||
”访问时长小于1小时“,所以我们需要column=[访问时长], cell values = [1],所以有[访问时长:(1)]""",
|
||||
"schemaLinks":"""["用户名", "部门":("'美术部'"), "访问时长":(1)]""",
|
||||
"sql":"""select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1"""
|
||||
},
|
||||
{ "currentDate":"2023-08-31",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库pv最高的用户有哪些",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库pv最高的用户有哪些“中,我们被问:
|
||||
“用户有哪些”,所以我们需要column=[用户名]
|
||||
”pv最高的“,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]""",
|
||||
"schemaLinks":"""["用户名", "访问次数":(1)]""",
|
||||
"sql":"""select 用户名 from 内容库产品 order by 访问次数 desc limit 1"""
|
||||
},
|
||||
{ "currentDate":"2023-08-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||
"question":"近90天袁亚伟播放量平均值是多少",
|
||||
"priorSchemaLinks":"""['152789226'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“近90天袁亚伟播放量平均值是多少“中,我们被问:
|
||||
“播放量平均值是多少”,所以我们需要column=[结算播放量]
|
||||
”袁亚伟“,所以我们需要column=[歌手名], cell values = ['袁亚伟'],所以有[歌手名:('袁亚伟')]
|
||||
”近90天“,所以我们需要column=[数据日期], cell values = [90],所以有[数据日期:(90)]""",
|
||||
"schemaLinks":"""["结算播放量", "歌手名":("'袁亚伟'"), "数据日期":(90)]""",
|
||||
"sql":"""select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 """
|
||||
},
|
||||
{ "currentDate":"2023-08-31",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||
"question":"周倩倩近7天结算播放量总和是多少",
|
||||
"priorSchemaLinks":"""['199509'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“周倩倩近7天结算播放量总和是多少“中,我们被问:
|
||||
“结算播放量总和是多少”,所以我们需要column=[结算播放量]
|
||||
”周倩倩“,所以我们需要column=[歌手名], cell values = ['周倩倩'],所以有[歌手名:('周倩倩')]
|
||||
”近7天“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]""",
|
||||
"schemaLinks":"""["结算播放量", "歌手名":("'周倩倩'"), "数据日期":(7)]""",
|
||||
"sql":"""select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 """
|
||||
},
|
||||
{ "currentDate":"2023-09-14",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["部门", "模块", "用户名", "访问次数", "访问人数", "访问时长", "数据日期"]""",
|
||||
"question":"内容库访问次数大于1k的部门是哪些",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库访问次数大于1k的部门是哪些“中,我们被问:
|
||||
“部门是哪些”,所以我们需要column=[部门]
|
||||
”访问次数大于1k的“,所以我们需要column=[访问次数], cell values = [1000],所以有[访问次数:(1000)]""",
|
||||
"schemaLinks":"""["部门", "访问次数":(1000)]""",
|
||||
"sql":"""select 部门 from 内容库产品 where 访问次数 > 1000"""
|
||||
},
|
||||
{ "currentDate":"2023-09-18",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "MPPM歌手ID", "歌曲版本", "歌曲类型", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"陈亿训唱的所有的播放量大于20k的孤勇者有哪些",
|
||||
"priorSchemaLinks":"""['199509'->MPPM歌手ID, '1527123'->MPPM歌曲ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“陈亿训唱的所有的播放量大于20k的孤勇者有哪些“中,我们被问:
|
||||
“孤勇者有哪些”,所以我们需要column=[歌曲名], cell values = ['孤勇者'],所以有[歌曲名:('孤勇者')]
|
||||
”播放量大于20k的“,所以我们需要column=[结算播放量], cell values = [20000],所以有[结算播放量:(20000)]
|
||||
”陈亿训唱的“,所以我们需要column=[歌手名], cell values = ['陈亿训'],所以有[歌手名:('陈亿训')]""",
|
||||
"schemaLinks":"""["歌曲名":("'孤勇者'"), "结算播放量":(20000), "歌手名":("'陈亿训'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者'"""
|
||||
},
|
||||
{ "currentDate":"2023-09-18",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"周洁轮去年发布的歌曲有哪些",
|
||||
"priorSchemaLinks":"""['23109'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“周洁轮去年发布的歌曲有哪些“中,我们被问:
|
||||
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
||||
”去年发布的“,所以我们需要column=[发布时间], cell values = [1],所以有[发布时间:(1)]
|
||||
”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]""",
|
||||
"schemaLinks":"""["歌曲名", "发布时间":(1), "歌手名":("'周洁轮'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮'"""
|
||||
},
|
||||
{ "currentDate":"2023-09-11",
|
||||
"tableName":"艺人库",
|
||||
"fieldsList":"""["播放量层级", "播放量单调性", "播放量方差", "播放量突增类型", "播放量集中度", "歌手名", "歌手等级", "歌手类型", "歌手来源", "签约日期", "MPPM潮流人等级", "结算播放量", "运营播放量", "历史累计结算播放量", "有播放量歌曲数", "历史累计运营播放量", "付费用户结算播放量", "结算播放量占比", "运营播放份额", "免费用户结算播放占比", "完播量", "数据日期"]""",
|
||||
"question":"我想要近半年签约的播放量前十的歌手有哪些",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“我想要近半年签约的播放量前十的歌手“中,我们被问:
|
||||
“歌手有哪些”,所以我们需要column=[歌手名]
|
||||
”播放量前十的“,所以我们需要column=[结算播放量], cell values = [10],所以有[结算播放量:(10)]
|
||||
”近半年签约的“,所以我们需要column=[签约日期], cell values = [0.5],所以有[签约日期:(0.5)]""",
|
||||
"schemaLinks":"""["歌手名", "结算播放量":(10), "签约日期":(0.5)]""",
|
||||
"sql":"""select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 order by 结算播放量 desc limit 10"""
|
||||
},
|
||||
{ "currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||
"question":"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“最近一年发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||
”最近一年发行的“,所以我们需要column=[发行日期], cell values = [1],所以有[发行日期:(1)]
|
||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]""",
|
||||
"schemaLinks":"""["歌曲名", "发行日期":(1), "数据日期":(7), "结算播放量":(10000000)]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
||||
},
|
||||
{ "currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||
"question":"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“今年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||
”今年以来发行的“,所以我们需要column=[发行日期], cell values = [0],所以有[发行日期:(0)]
|
||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]""",
|
||||
"schemaLinks":"""["歌曲名", "发行日期":(0), "数据日期":(7), "结算播放量":(10000000)]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
||||
},
|
||||
{ "currentDate":"2023-08-12",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList": """["发行日期", "歌曲语言", "歌曲来源", "歌曲流派", "歌曲名", "歌曲版本", "歌曲类型", "发行时间", "数据日期"]""",
|
||||
"question":"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"priorSchemaLinks":"""['514129144'->MPPM歌曲ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的“中,我们被问:
|
||||
“发行的歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||
”2023年以来发行的“,所以我们需要column=[发行日期], cell values = ['2023-01-01'],所以有[发行日期:('2023-01-01')]
|
||||
”在近7天播放超过一千万的“,所以我们需要column=[数据日期, 结算播放量], cell values = [7, 10000000],所以有[数据日期:(7), 结算播放量:(10000000)]""",
|
||||
"schemaLinks":"""["歌曲名", "发行日期":("'2023-01-01'"), "数据日期":(7), "结算播放量":(10000000)]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 发行日期 >= '2023-01-01' and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000"""
|
||||
},
|
||||
{ "currentDate":"2023-08-01",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"周洁轮2023年6月之后发布的歌曲有哪些",
|
||||
"priorSchemaLinks":"""['23109'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“周洁轮2023年6月之后发布的歌曲有哪些“中,我们被问:
|
||||
“歌曲有哪些”,所以我们需要column=[歌曲名]
|
||||
”2023年6月之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-06-01'],所以有[发布时间:('2023-06-01')]
|
||||
”周洁轮“,所以我们需要column=[歌手名], cell values = ['周洁轮'],所以有[歌手名:('周洁轮')]""",
|
||||
"schemaLinks":"""["歌曲名", "发布时间":("'2023-06-01'"), "歌手名":("'周洁轮'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 发布时间 >= '2023-06-01' and 歌手名 = '周洁轮'"""
|
||||
},
|
||||
{ "currentDate":"2023-08-01",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?",
|
||||
"priorSchemaLinks":"""['2312311'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?“中,我们被问:
|
||||
“歌曲中,有哪些”,所以我们需要column=[歌曲名]
|
||||
“播放量大于500W的”,所以我们需要column=[结算播放量], cell values = [5000000],所以有[结算播放量:(5000000)]
|
||||
”邓梓琦在2023年1月5日之后发布的“,所以我们需要column=[发布时间], cell values = ['2023-01-05'],所以有[发布时间:('2023-01-05')]
|
||||
”邓梓琦“,所以我们需要column=[歌手名], cell values = ['邓梓琦'],所以有[歌手名:('邓梓琦')]""",
|
||||
"schemaLinks":"""["歌曲名", "结算播放量":(5000000), "发布时间":("'2023-01-05'"), "歌手名":("'邓梓琦'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 发布时间 >= '2023-01-05' and 歌手名 = '邓梓琦' and 结算播放量 > 5000000"""
|
||||
},
|
||||
{ "currentDate":"2023-09-17",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"2023年6月以后,张亮英播放量大于200万的歌曲有哪些?",
|
||||
"priorSchemaLinks":"""['45453'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“2023年6月以后,张亮英播放量大于200万的歌曲有哪些?“中,我们被问:
|
||||
“播放量大于200万的”,所以我们需要column=[结算播放量], cell values = [2000000],所以有[结算播放量:(2000000)]
|
||||
”2023年6月以后,张亮英“,所以我们需要column=[数据日期, 歌手名], cell values = ['2023-06-01', '张亮英'],所以有[数据日期:('2023-06-01'), 歌手名:('张亮英')],
|
||||
”歌曲有哪些“,所以我们需要column=[歌曲名]""",
|
||||
"schemaLinks":"""["结算播放量":(2000000), "数据日期":("'2023-06-01'"), "歌手名":("'张亮英'"), "歌曲名"]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 数据日期 >= '2023-06-01' and 歌手名 = '张亮英' and 结算播放量 > 2000000"""
|
||||
},
|
||||
{ "currentDate":"2023-08-16",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些",
|
||||
"priorSchemaLinks":"""['23109'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些“中,我们被问:
|
||||
“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]
|
||||
”2021年6月以后发布的“,所以我们需要column=[发布时间], cell values = ['2021-06-01'],所以有[发布时间:('2021-06-01')]
|
||||
”李雨纯“,所以我们需要column=[歌手名], cell values = ['李雨纯'],所以有[歌手名:('李雨纯')]""",
|
||||
"schemaLinks":"""["结算播放量":(200000), "发布时间":("'2021-06-01'"), "歌手名":("'李雨纯'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 发布时间 >= '2021-06-01' and 歌手名 = '李雨纯' and 结算播放量 > 200000"""
|
||||
},
|
||||
{ "currentDate":"2023-08-16",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲名", "歌曲版本", "歌手名", "歌曲类型", "发布时间", "MPPM歌曲ID", "是否严选窄口径歌曲", "是否严选宽口径歌曲", "是否潮流人歌曲", "超声波歌曲ID", "C音歌曲ID", "C音歌曲MID", "结算播放量", "运营播放量", "分享量", "收藏量", "运营搜播量", "结算搜播量", "拉新用户数", "拉活用户数", "分享率", "结算播放份额", "数据日期"]""",
|
||||
"question":"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些",
|
||||
"priorSchemaLinks":"""['4234234'->MPPM歌手ID]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些“中,我们被问:
|
||||
“播放量大于20万的”,所以我们需要column=[结算播放量], cell values = [200000],所以有[结算播放量:(200000)]
|
||||
”1992年4月2日到2020年5月2日之间发布的“, 所以我们需要column=[发布时间], cell values = ['1992-04-02', '2020-05-02'],所以有[发布时间:('1992-04-02', '2020-05-02')]
|
||||
”刘锝桦“,所以我们需要column=[歌手名], cell values = ['刘锝桦'],所以有[歌手名:('刘锝桦')]""",
|
||||
"schemaLinks":"""["结算播放量":(200000), "发布时间":("'1992-04-02'", "'2020-05-02'"), "歌手名":("'刘锝桦'")]""",
|
||||
"sql":"""select 歌曲名 from 歌曲库 where 发布时间 >= '1992-04-02' and 发布时间 <= '2020-05-02' and 歌手名 = '刘锝桦' and 结算播放量 > 200000"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库近30天访问次数的平均数",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库近30天访问次数的平均数“中,我们被问:
|
||||
“访问次数的平均数”,所以我们需要column=[访问次数]
|
||||
”内容库近30天“,所以我们需要column=[数据日期], cell values = [30],所以有[数据日期:(30)]""",
|
||||
"schemaLinks":"""["访问次数", "数据日期":(30)]""",
|
||||
"sql":"""select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 """
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库近半年哪个月的访问次数汇总最高",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库近半年哪个月的访问次数汇总最高“中,我们被问:
|
||||
“访问次数汇总最高”,所以我们需要column=[访问次数], cell values = [1],所以有[访问次数:(1)]
|
||||
”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""",
|
||||
"schemaLinks":"""["访问次数":(1), "数据日期":(0.5)]""",
|
||||
"sql":"""select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-04",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库近半年每个月的平均访问次数",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库近半年每个月的平均访问次数“中,我们被问:
|
||||
“每个月的平均访问次数”,所以我们需要column=[访问次数]
|
||||
”内容库近半年“,所以我们需要column=[数据日期], cell values = [0.5],所以有[数据日期:(0.5)]""",
|
||||
"schemaLinks":"""["访问次数", "数据日期":(0.5)]""",
|
||||
"sql":"""select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期)"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"内容库 按部门统计访问次数 top10 的部门",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“内容库 按部门统计访问次数 top10 的部门“中,我们被问:
|
||||
“访问次数 top10 的部门”,所以我们需要column=[访问次数], cell values = [10],所以有[访问次数:(10)]
|
||||
”内容库 按部门统计“,所以我们需要column=[部门]""",
|
||||
"schemaLinks":"""["访问次数":(10), "部门"]""",
|
||||
"sql":"""select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"内容库产品",
|
||||
"fieldsList":"""["用户名", "部门", "模块", "访问时长", "访问次数", "访问人数", "数据日期"]""",
|
||||
"question":"超音速 近7个月,月度总访问量超过 2万的月份",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“超音速 近7个月,月度总访问量超过 2万的月份“中,我们被问:
|
||||
“月度总访问量超过 2万的月份”,所以我们需要column=[访问次数], cell values = [20000],所以有[访问次数:(20000)]
|
||||
”超音速 近7个月“,所以我们需要column=[数据日期], cell values = [7],所以有[数据日期:(7)]""",
|
||||
"schemaLinks":"""["访问次数":(20000), "数据日期":(7)]""",
|
||||
"sql":"""select MONTH(数据日期) from 内容库产品 where datediff('day', 数据日期, '2023-09-10') <= 7 group by MONTH(数据日期) having sum(访问次数) > 20000"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲语言", "歌曲来源", "运营播放量", "播放量", "歌曲名", "结算播放量", "专辑名", "发布日期", "歌曲版本", "歌曲类型", "数据日期"]""",
|
||||
"question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量“中,我们被问:
|
||||
“按月粒度来统计近1年的运营播放量”,所以我们需要column=[运营播放量, 数据日期], cell values = [1],所以有[运营播放量, 数据日期:(1)]
|
||||
”按播放量取top 100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]
|
||||
“2022年7月到2023年7月之间发布到歌曲”,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]""",
|
||||
"schemaLinks":"""["运营播放量", "数据日期":(1), "播放量":(100), "发布日期":("'2022-07-01'", "'2023-07-01'")]""",
|
||||
"sql":"""select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期)"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-09-10",
|
||||
"tableName":"歌曲库",
|
||||
"fieldsList":"""["歌曲语言", "歌曲来源", "运营播放量", "播放量", "歌曲名", "结算播放量", "专辑名", "发布日期", "歌曲版本", "歌曲类型", "数据日期"]""",
|
||||
"question":"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份",
|
||||
"priorSchemaLinks":"""[]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份“中,我们被问:
|
||||
“筛选出其中运营播放量之和大于2k的月份”,所以我们需要column=[运营播放量], cell values = [2000],所以有[运营播放量:(2000)]
|
||||
”按月粒度来统计近1年的运营播放量之和“,所以我们需要column=[数据日期], cell values = [1],所以有[数据日期:(1)]
|
||||
”按播放量取top100“,所以我们需要column=[播放量], cell values = [100],所以有[播放量:(100)]
|
||||
”2022年7月到2023年7月之间发布到歌曲“,所以我们需要column=[发布日期], cell values = ['2022-07-01', '2023-07-01'],所以有[发布日期:('2022-07-01', '2023-07-01')]""",
|
||||
"schemaLinks":"""["运营播放量":(2000), "数据日期":(1), "播放量":(100), "发布日期":("'2022-07-01'", "'2023-07-01'")]""",
|
||||
"sql":"""select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期) having sum(运营播放量) > 2000"""
|
||||
},
|
||||
{
|
||||
"currentDate":"2023-11-01",
|
||||
"tableName":"营销月模型",
|
||||
"fieldsList":"""["国家中文名", "机型类别", "销量", "数据日期"]""",
|
||||
"question":"今年智能机在哪个国家的销量之和最高",
|
||||
"priorSchemaLinks":"""['智能机'->机型类别]""",
|
||||
"analysis": """让我们一步一步地思考。在问题“今年智能机在哪个国家的销量之和最高“中,我们被问:
|
||||
“销量最高”,所以我们需要column=[销量], cell values = [1],所以有[销量:(1)]
|
||||
”今年“,所以我们需要column=[数据日期], cell values = ['2023-01-01', '2023-11-01'],所以有[数据日期:('2023-01-01', '2023-11-01')]
|
||||
”智能机“,所以我们需要column=[机型类别], cell values = ['智能机'],所以有[机型类别:('智能机')]""",
|
||||
"schemaLinks":"""["销量":(1), "数据日期":("'2023-01-01'", "'2023-11-01'"), "机型类别":("'智能机'")]""",
|
||||
"sql":"""select 国家中文名, sum(销量) from 营销月模型 where 机型类别 = '智能机' and 数据日期 >= '2023-01-01' and 数据日期 <= '2023-11-01' group by 国家中文名 order by sum(销量) desc limit 1"""
|
||||
}
|
||||
]
|
||||
@@ -8,8 +8,6 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from instances.logging_instance import logger
|
||||
|
||||
|
||||
def type_convert(input_str: str):
|
||||
try:
|
||||
@@ -27,6 +25,11 @@ config_path = os.path.join(CONFIG_DIR_PATH, config_file)
|
||||
config = configparser.ConfigParser()
|
||||
config.read(config_path)
|
||||
|
||||
log_dir = "log"
|
||||
LOG_DIR_PATH = os.path.join(PROJECT_DIR_PATH, log_dir)
|
||||
log_file = "run.log"
|
||||
LOG_FILE_PATH = os.path.join(LOG_DIR_PATH, log_file)
|
||||
|
||||
llm_parser_section_name = "LLMParser"
|
||||
LLMPARSER_HOST = config.get(llm_parser_section_name, 'LLMPARSER_HOST')
|
||||
LLMPARSER_PORT = int(config.get(llm_parser_section_name, 'LLMPARSER_PORT'))
|
||||
@@ -57,21 +60,22 @@ for option in config.options(llm_model_section_name):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info(f"PROJECT_DIR_PATH: {PROJECT_DIR_PATH}")
|
||||
logger.info(f"EMB_MODEL_PATH: {HF_TEXT2VEC_MODEL_NAME}")
|
||||
logger.info(f"CHROMA_DB_PERSIST_PATH: {CHROMA_DB_PERSIST_PATH}")
|
||||
logger.info(f"LLMPARSER_HOST: {LLMPARSER_HOST}")
|
||||
logger.info(f"LLMPARSER_PORT: {LLMPARSER_PORT}")
|
||||
logger.info(f"llm_config_dict: {llm_config_dict}")
|
||||
logger.info(f"LLM_PROVIDER_NAME: {LLM_PROVIDER_NAME}")
|
||||
logger.info(f"PRESET_QUERY_COLLECTION_NAME: {PRESET_QUERY_COLLECTION_NAME}")
|
||||
logger.info(f"SOLVED_QUERY_COLLECTION_NAME: {SOLVED_QUERY_COLLECTION_NAME}")
|
||||
logger.info(f"TEXT2DSLAGENT_COLLECTION_NAME: {TEXT2DSLAGENT_COLLECTION_NAME}")
|
||||
logger.info(f"TEXT2DSLAGENTACT_COLLECTION_NAME: {TEXT2DSLAGENTACT_COLLECTION_NAME}")
|
||||
logger.info(f"TEXT2DSL_EXAMPLE_NUM: {TEXT2DSL_EXAMPLE_NUM}")
|
||||
logger.info(f"TEXT2DSL_FEWSHOTS_NUM: {TEXT2DSL_FEWSHOTS_NUM}")
|
||||
logger.info(f"TEXT2DSL_SELF_CONSISTENCY_NUM: {TEXT2DSL_SELF_CONSISTENCY_NUM}")
|
||||
logger.info(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
|
||||
logger.info(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")
|
||||
print(f"PROJECT_DIR_PATH: {PROJECT_DIR_PATH}")
|
||||
print(f"EMB_MODEL_PATH: {HF_TEXT2VEC_MODEL_NAME}")
|
||||
print(f"CHROMA_DB_PERSIST_PATH: {CHROMA_DB_PERSIST_PATH}")
|
||||
print(f"LLMPARSER_HOST: {LLMPARSER_HOST}")
|
||||
print(f"LLMPARSER_PORT: {LLMPARSER_PORT}")
|
||||
print(f"llm_config_dict: {llm_config_dict}")
|
||||
print(f"LLM_PROVIDER_NAME: {LLM_PROVIDER_NAME}")
|
||||
print(f"PRESET_QUERY_COLLECTION_NAME: {PRESET_QUERY_COLLECTION_NAME}")
|
||||
print(f"SOLVED_QUERY_COLLECTION_NAME: {SOLVED_QUERY_COLLECTION_NAME}")
|
||||
print(f"TEXT2DSLAGENT_COLLECTION_NAME: {TEXT2DSLAGENT_COLLECTION_NAME}")
|
||||
print(f"TEXT2DSLAGENTACT_COLLECTION_NAME: {TEXT2DSLAGENTACT_COLLECTION_NAME}")
|
||||
print(f"TEXT2DSL_EXAMPLE_NUM: {TEXT2DSL_EXAMPLE_NUM}")
|
||||
print(f"TEXT2DSL_FEWSHOTS_NUM: {TEXT2DSL_FEWSHOTS_NUM}")
|
||||
print(f"TEXT2DSL_SELF_CONSISTENCY_NUM: {TEXT2DSL_SELF_CONSISTENCY_NUM}")
|
||||
print(f"ACT_MIN_WINDOWN_SIZE: {ACT_MIN_WINDOWN_SIZE}")
|
||||
print(f"ACT_MAX_WINDOWN_SIZE: {ACT_MAX_WINDOWN_SIZE}")
|
||||
print(f"LOG_FILE_PATH: {LOG_FILE_PATH}")
|
||||
|
||||
|
||||
@@ -2,22 +2,25 @@
|
||||
{
|
||||
"question": "比较jackjchen和robinlee在内容库的访问次数",
|
||||
"questionAugmented": "比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 用户名, 访问次数 from 内容库产品 where 用户名 in ('jackjchen', 'robinlee')",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"’用户名‘,\" so we need column = [用户名]\n\"的访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [用户名,访问次数,jackjchen,robinlee]",
|
||||
"generatedSchemaLinkings": "[用户名,访问次数,jackjchen,robinlee]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"比较jackjchen和robinlee在内容库的访问次数 (补充信息:’'jackjchen'‘是一个’用户名‘,’ 'robinlee'‘是一个’用户名‘。当前的日期是2020-12-01) (备注: )\", we are asked:\n\"的访问次数 \" so we need column = [访问次数]\n\"’用户名‘,\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [jackjchen,robinlee]. So the Schema_links are:\nSchema_links: [访问次数,用户名,jackjchen,robinlee]",
|
||||
"generatedSchemaLinkings": "[访问次数,用户名,jackjchen,robinlee]"
|
||||
},
|
||||
{
|
||||
"question": "内容库近12个月访问人数 按部门",
|
||||
"questionAugmented": "内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 部门, 数据日期, 访问人数 from 内容库产品 where datediff('month', 数据日期, '2022-11-06') <= 12 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [部门,访问人数,数据日期,12,month]",
|
||||
"generatedSchemaLinkings": "[部门,访问人数,数据日期,12,month]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近12个月访问人数 按部门 (补充信息:。当前的日期是2022-11-06) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\" 按部门 (\" so we need column = [部门]\n\"访问人数 按\" so we need column = [访问人数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [12,month]. So the Schema_links are:\nSchema_links: [数据日期,部门,访问人数,12,month]",
|
||||
"generatedSchemaLinkings": "[数据日期,部门,访问人数,12,month]"
|
||||
},
|
||||
{
|
||||
"question": "内容库美术部、技术研发部的访问时长",
|
||||
"questionAugmented": "内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 部门, 访问时长 from 内容库产品 where 部门 in ('美术部', '技术研发部')",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库美术部、技术研发部的访问时长 (补充信息:’'美术部'‘是一个’部门‘,’ '技术研发部'‘是一个’部门‘。当前的日期是2023-04-21) (备注: )\", we are asked:\n\"部门‘,’ \" so we need column = [部门]\n\"的访问时长 \" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,技术研发部]. So the Schema_links are:\nSchema_links: [部门,访问时长,美术部,技术研发部]",
|
||||
@@ -26,22 +29,25 @@
|
||||
{
|
||||
"question": "近3天海田飞系MPPM结算播放份额",
|
||||
"questionAugmented": "近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )",
|
||||
"modelName": "严选",
|
||||
"dbSchema": "Table: 严选, Columns = [\"严选版权归属系\", \"付费模式\", \"结算播放份额\", \"付费用户结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 严选版权归属系, 结算播放份额 from 严选 where 严选版权归属系 = '海田飞系' and datediff('day', 数据日期, '2023-08-21') <= 3 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )\", we are asked:\n\"结算播放份额 \" so we need column = [结算播放份额]\n\"严选版权归属系\" so we need column = [严选版权归属系]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [海田飞系,3,day]. So the Schema_links are:\nSchema_links: [结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]",
|
||||
"generatedSchemaLinkings": "[结算播放份额,严选版权归属系,数据日期,海田飞系,3,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近3天海田飞系MPPM结算播放份额 (补充信息:’'海田飞系'‘是一个’严选版权归属系‘。当前的日期是2023-08-21) (备注: )\", we are asked:\n\"结算播放份额 \" so we need column = [结算播放份额]\n\"当前的日期是\" so we need column = [数据日期]\n\"严选版权归属系\" so we need column = [严选版权归属系]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [海田飞系,3,day]. So the Schema_links are:\nSchema_links: [结算播放份额,数据日期,严选版权归属系,海田飞系,3,day]",
|
||||
"generatedSchemaLinkings": "[结算播放份额,数据日期,严选版权归属系,海田飞系,3,day]"
|
||||
},
|
||||
{
|
||||
"question": "对比近7天翻唱版和纯音乐的歌曲播放量",
|
||||
"questionAugmented": "对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"是否潮流人歌曲\", \"C音歌曲ID\", \"C音歌曲MID\", \"歌曲名\", \"歌曲版本\", \"语种\", \"歌曲类型\", \"翻唱类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"结算播放量\", \"运营播放量\", \"付费用户结算播放量\", \"历史累计结算播放量\", \"运营搜播量\", \"结算搜播量\", \"运营完播量\", \"运营推播量\", \"近7日复播率\", \"日均搜播量\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲版本, 语种, 结算播放量 from 歌曲库 where 歌曲版本 = '翻唱版' and 语种 = '纯音乐' and datediff('day', 数据日期, '2023-05-22') <= 7 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )\", we are asked:\n\"曲播放量 (\" so we need column = [结算播放量]\n\"’歌曲版本‘\" so we need column = [歌曲版本]\n\"语种‘,’ \" so we need column = [语种]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,翻唱版,纯音乐,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]",
|
||||
"generatedSchemaLinkings": "[结算播放量,歌曲版本,语种,数据日期,7,翻唱版,纯音乐,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比近7天翻唱版和纯音乐的歌曲播放量 (补充信息:’'纯音乐'‘是一个’语种‘,’ '翻唱版'‘是一个’歌曲版本‘。当前的日期是2023-05-22) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"’歌曲版本‘\" so we need column = [歌曲版本]\n\"语种‘,’ \" so we need column = [语种]\n\"曲播放量 (\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,翻唱版,纯音乐,day]. So the Schema_links are:\nSchema_links: [数据日期,歌曲版本,语种,结算播放量,7,翻唱版,纯音乐,day]",
|
||||
"generatedSchemaLinkings": "[数据日期,歌曲版本,语种,结算播放量,7,翻唱版,纯音乐,day]"
|
||||
},
|
||||
{
|
||||
"question": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数",
|
||||
"questionAugmented": "对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )",
|
||||
"modelName": "艺人库",
|
||||
"dbSchema": "Table: 艺人库, Columns = [\"上下架状态\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"活跃区域\", \"年龄\", \"歌手才能\", \"歌手风格\", \"粉丝数\", \"潮音粉丝数\", \"超声波粉丝数\", \"推博粉丝数\", \"超声波歌曲数\", \"在架歌曲数\", \"超声波分享数\", \"独占歌曲数\", \"超声波在架歌曲评论数\", \"有播放量歌曲数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌手名, 粉丝数 from 艺人库 where 歌手名 in ('陈拙悬', '孟梅琦', '赖媚韵')",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"对比一下陈拙悬、孟梅琦、赖媚韵的粉丝数 (补充信息:’'1527896'‘是一个’MPPM歌手ID‘,’ '1565463'‘是一个’MPPM歌手ID‘,’ '2141459'‘是一个’MPPM歌手ID‘。当前的日期是2023-05-31) (备注: )\", we are asked:\n\"歌手ID‘,\" so we need column = [歌手名]\n\"的粉丝数 (\" so we need column = [粉丝数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [陈拙悬,孟梅琦,赖媚韵]. So the Schema_links are:\nSchema_links: [歌手名,粉丝数,陈拙悬,孟梅琦,赖媚韵]",
|
||||
@@ -50,6 +56,7 @@
|
||||
{
|
||||
"question": "播放量大于1万的歌曲有多少",
|
||||
"questionAugmented": "播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 结算播放量 > 10000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"播放量大于1万的歌曲有多少 (补充信息:。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"歌曲有多少 \" so we need column = [歌曲名]\n\"播放量大于1\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,10000]",
|
||||
@@ -58,46 +65,52 @@
|
||||
{
|
||||
"question": "内容库访问时长小于1小时,且来自美术部的用户是哪些",
|
||||
"questionAugmented": "内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 用户名 from 内容库产品 where 部门 = '美术部' and 访问时长 < 1",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"术部的用户是\" so we need column = [用户名]\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [用户名,部门,访问时长,美术部,1]",
|
||||
"generatedSchemaLinkings": "[用户名,部门,访问时长,美术部,1]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问时长小于1小时,且来自美术部的用户是哪些 (补充信息:’'美术部'‘是一个’部门‘。当前的日期是2023-07-31) (备注: )\", we are asked:\n\"一个’部门‘\" so we need column = [部门]\n\"访问时长小于\" so we need column = [访问时长]\n\"术部的用户是\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [美术部,1]. So the Schema_links are:\nSchema_links: [部门,访问时长,用户名,美术部,1]",
|
||||
"generatedSchemaLinkings": "[部门,访问时长,用户名,美术部,1]"
|
||||
},
|
||||
{
|
||||
"question": "内容库pv最高的用户有哪些",
|
||||
"questionAugmented": "内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 用户名 from 内容库产品 order by 访问次数 desc limit 1",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"用户有哪些 (\" so we need column = [用户名]\n\"最高的用户有\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [用户名,访问次数,1]",
|
||||
"generatedSchemaLinkings": "[用户名,访问次数,1]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库pv最高的用户有哪些 (补充信息:。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"最高的用户有\" so we need column = [访问次数]\n\"用户有哪些 (\" so we need column = [用户名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1]. So the Schema_links are:\nSchema_links: [访问次数,用户名,1]",
|
||||
"generatedSchemaLinkings": "[访问次数,用户名,1]"
|
||||
},
|
||||
{
|
||||
"question": "近90天袁亚伟播放量平均值是多少",
|
||||
"questionAugmented": "近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )",
|
||||
"modelName": "艺人库",
|
||||
"dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select avg(结算播放量) from 艺人库 where 歌手名 = '袁亚伟' and datediff('day', 数据日期, '2023-08-31') <= 90 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"播放量平均值\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [袁亚伟,90,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,袁亚伟,90,day]",
|
||||
"generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,袁亚伟,90,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"近90天袁亚伟播放量平均值是多少 (补充信息:’'152789226'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\n\"播放量平均值\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [袁亚伟,90,day]. So the Schema_links are:\nSchema_links: [歌手名,数据日期,结算播放量,袁亚伟,90,day]",
|
||||
"generatedSchemaLinkings": "[歌手名,数据日期,结算播放量,袁亚伟,90,day]"
|
||||
},
|
||||
{
|
||||
"question": "周倩倩近7天结算播放量总和是多少",
|
||||
"questionAugmented": "周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )",
|
||||
"modelName": "艺人库",
|
||||
"dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select sum(结算播放量) from 艺人库 where 歌手名 = '周倩倩' and datediff('day', 数据日期, '2023-08-31') <= 7 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"结算播放量总\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [周倩倩,7,day]. So the Schema_links are:\nSchema_links: [结算播放量,歌手名,数据日期,周倩倩,7,day]",
|
||||
"generatedSchemaLinkings": "[结算播放量,歌手名,数据日期,周倩倩,7,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周倩倩近7天结算播放量总和是多少 (补充信息:’'199509'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-31) (备注: )\", we are asked:\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\n\"结算播放量总\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [周倩倩,7,day]. So the Schema_links are:\nSchema_links: [歌手名,数据日期,结算播放量,周倩倩,7,day]",
|
||||
"generatedSchemaLinkings": "[歌手名,数据日期,结算播放量,周倩倩,7,day]"
|
||||
},
|
||||
{
|
||||
"question": "内容库访问次数大于1k的部门是哪些",
|
||||
"questionAugmented": "内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"部门\", \"模块\", \"用户名\", \"访问次数\", \"访问人数\", \"访问时长\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 部门 from 内容库产品 where 访问次数 > 1000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"访问次数大于\" so we need column = [访问次数]\n\"部门是哪些 \" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [访问次数,部门,1000]",
|
||||
"generatedSchemaLinkings": "[访问次数,部门,1000]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库访问次数大于1k的部门是哪些 (补充信息:。当前的日期是2023-09-14) (备注: )\", we are asked:\n\"部门是哪些 \" so we need column = [部门]\n\"访问次数大于\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1000]. So the Schema_links are:\nSchema_links: [部门,访问次数,1000]",
|
||||
"generatedSchemaLinkings": "[部门,访问次数,1000]"
|
||||
},
|
||||
{
|
||||
"question": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些",
|
||||
"questionAugmented": "陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"MPPM歌手ID\", \"歌曲版本\", \"歌曲类型\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 结算播放量 > 20000 and 歌手名 = '陈亿训' and 歌曲名 = '孤勇者'",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"陈亿训唱的所有的播放量大于20k的孤勇者有哪些 (补充信息:’'199509'‘是一个’MPPM歌手ID‘,’ '1527123'‘是一个’MPPM歌曲ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲ID‘。\" so we need column = [歌曲名]\n\"的所有的播放量\" so we need column = [结算播放量]\n\"歌手ID‘,\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [孤勇者,20000,陈亿训]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,孤勇者,20000,陈亿训]",
|
||||
@@ -106,6 +119,7 @@
|
||||
{
|
||||
"question": "周洁轮去年发布的歌曲有哪些",
|
||||
"questionAugmented": "周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where datediff('year', 发布时间, '2023-09-18') <= 1 and 歌手名 = '周洁轮'",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮去年发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-18) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"发布的歌曲有\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,周洁轮,year]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,1,周洁轮,year]",
|
||||
@@ -114,38 +128,43 @@
|
||||
{
|
||||
"question": "我想要近半年签约的播放量前十的歌手有哪些",
|
||||
"questionAugmented": "我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )",
|
||||
"modelName": "艺人库",
|
||||
"dbSchema": "Table: 艺人库, Columns = [\"播放量层级\", \"播放量单调性\", \"播放量方差\", \"播放量突增类型\", \"播放量集中度\", \"歌手名\", \"歌手等级\", \"歌手类型\", \"歌手来源\", \"签约日期\", \"MPPM潮流人等级\", \"结算播放量\", \"运营播放量\", \"历史累计结算播放量\", \"有播放量歌曲数\", \"历史累计运营播放量\", \"付费用户结算播放量\", \"结算播放量占比\", \"运营播放份额\", \"免费用户结算播放占比\", \"完播量\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌手名 from 艺人库 where datediff('year', 签约日期, '2023-09-11') <= 0.5 order by 结算播放量 desc limit 10",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )\", we are asked:\n\"歌手有哪些 \" so we need column = [歌手名]\n\"签约的播放量\" so we need column = [结算播放量]\n\"签约的播放量\" so we need column = [签约日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10,0.5,year]. So the Schema_links are:\nSchema_links: [歌手名,结算播放量,签约日期,10,0.5,year]",
|
||||
"generatedSchemaLinkings": "[歌手名,结算播放量,签约日期,10,0.5,year]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"我想要近半年签约的播放量前十的歌手有哪些 (补充信息:。当前的日期是2023-09-11) (备注: )\", we are asked:\n\"签约的播放量\" so we need column = [签约日期]\n\"歌手有哪些 \" so we need column = [歌手名]\n\"签约的播放量\" so we need column = [结算播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10,0.5,year]. So the Schema_links are:\nSchema_links: [签约日期,歌手名,结算播放量,10,0.5,year]",
|
||||
"generatedSchemaLinkings": "[签约日期,歌手名,结算播放量,10,0.5,year]"
|
||||
},
|
||||
{
|
||||
"question": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"questionAugmented": "最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 1 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"最近一年发行\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,1,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,1,7,year,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"最近一年发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"最近一年发行\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,1,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,1,7,year,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,1,7,year,day]"
|
||||
},
|
||||
{
|
||||
"question": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"questionAugmented": "今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where datediff('year', 发行日期, '2023-08-12') <= 0 and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,0,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,0,7,year,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,0,7,year,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,0,7,year,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,0,7,year,day]"
|
||||
},
|
||||
{
|
||||
"question": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的",
|
||||
"questionAugmented": "2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"发行日期\", \"歌曲语言\", \"歌曲来源\", \"歌曲流派\", \"歌曲名\", \"歌曲版本\", \"歌曲类型\", \"发行时间\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 发行日期 >= '2023-01-01' and datediff('day', 数据日期, '2023-08-12') <= 7 and 结算播放量 > 10000000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,2023-01-01,7,day]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,结算播放量,发行日期,数据日期,10000000,2023-01-01,7,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年以来发行的歌曲中,有哪些在近7天播放超过一千万的 (补充信息:’'514129144'‘是一个’MPPM歌曲ID‘。当前的日期是2023-08-12) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"天播放超过一\" so we need column = [结算播放量]\n\"年以来发行的\" so we need column = [发行日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10000000,2023-01-01,7,day]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,发行日期,10000000,2023-01-01,7,day]",
|
||||
"generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,发行日期,10000000,2023-01-01,7,day]"
|
||||
},
|
||||
{
|
||||
"question": "周洁轮2023年6月之后发布的歌曲有哪些",
|
||||
"questionAugmented": "周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 发布时间 >= '2023-06-01' and 歌手名 = '周洁轮'",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"周洁轮2023年6月之后发布的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2023-06-01,周洁轮]. So the Schema_links are:\nSchema_links: [歌曲名,歌手名,发布时间,2023-06-01,周洁轮]",
|
||||
@@ -154,6 +173,7 @@
|
||||
{
|
||||
"question": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的?",
|
||||
"questionAugmented": "邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 发布时间 >= '2023-01-05' and 歌手名 = '邓梓琦' and 结算播放量 > 5000000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"邓梓琦在2023年1月5日之后发布的歌曲中,有哪些播放量大于500W的? (补充信息:’'2312311'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-01) (备注: )\", we are asked:\n\"的歌曲中,有\" so we need column = [歌曲名]\n\"中,有哪些播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [5000000,2023-01-05,邓梓琦]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,5000000,2023-01-05,邓梓琦]",
|
||||
@@ -162,14 +182,16 @@
|
||||
{
|
||||
"question": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些?",
|
||||
"questionAugmented": "2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 数据日期 >= '2023-06-01' and 歌手名 = '张亮英' and 结算播放量 > 2000000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )\", we are asked:\n\"的歌曲有哪些? (\" so we need column = [歌曲名]\n\"后,张亮英播放量大\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2000000,2023-06-01,张亮英]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]",
|
||||
"generatedSchemaLinkings": "[歌曲名,结算播放量,歌手名,数据日期,2000000,2023-06-01,张亮英]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2023年6月以后,张亮英播放量大于200万的歌曲有哪些? (补充信息:’'45453'‘是一个’MPPM歌手ID‘。当前的日期是2023-09-17) (备注: )\", we are asked:\n\"的歌曲有哪些? (\" so we need column = [歌曲名]\n\"当前的日期是\" so we need column = [数据日期]\n\"后,张亮英播放量大\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [2000000,2023-06-01,张亮英]. So the Schema_links are:\nSchema_links: [歌曲名,数据日期,结算播放量,歌手名,2000000,2023-06-01,张亮英]",
|
||||
"generatedSchemaLinkings": "[歌曲名,数据日期,结算播放量,歌手名,2000000,2023-06-01,张亮英]"
|
||||
},
|
||||
{
|
||||
"question": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些",
|
||||
"questionAugmented": "2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 发布时间 >= '2021-06-01' and 歌手名 = '李雨纯' and 结算播放量 > 200000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2021年6月以后发布的李雨纯的播放量大于20万的歌曲有哪些 (补充信息:’'23109'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"的播放量大于\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"月以后发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,2021-06-01,李雨纯]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,2021-06-01,李雨纯]",
|
||||
@@ -178,6 +200,7 @@
|
||||
{
|
||||
"question": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些",
|
||||
"questionAugmented": "刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲名\", \"歌曲版本\", \"歌手名\", \"歌曲类型\", \"发布时间\", \"MPPM歌曲ID\", \"是否严选窄口径歌曲\", \"是否严选宽口径歌曲\", \"是否潮流人歌曲\", \"超声波歌曲ID\", \"C音歌曲ID\", \"C音歌曲MID\", \"结算播放量\", \"运营播放量\", \"分享量\", \"收藏量\", \"运营搜播量\", \"结算搜播量\", \"拉新用户数\", \"拉活用户数\", \"分享率\", \"结算播放份额\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 歌曲名 from 歌曲库 where 发布时间 >= '1992-04-02' and 发布时间 <= '2020-05-02' and 歌手名 = '刘锝桦' and 结算播放量 > 200000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"刘锝桦在1992年4月2日到2020年5月2日之间发布的播放量大于20万的歌曲有哪些 (补充信息:’'4234234'‘是一个’MPPM歌手ID‘。当前的日期是2023-08-16) (备注: )\", we are asked:\n\"歌曲有哪些 \" so we need column = [歌曲名]\n\"发布的播放量\" so we need column = [结算播放量]\n\"歌手ID‘。\" so we need column = [歌手名]\n\"日之间发布的\" so we need column = [发布时间]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [200000,刘锝桦,1992-04-02,2020-05-02]. So the Schema_links are:\nSchema_links: [歌曲名,结算播放量,歌手名,发布时间,200000,刘锝桦,1992-04-02,2020-05-02]",
|
||||
@@ -186,65 +209,73 @@
|
||||
{
|
||||
"question": "内容库近30天访问次数的平均数",
|
||||
"questionAugmented": "内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select avg(访问次数) from 内容库产品 where datediff('day', 数据日期, '2023-09-04') <= 30 ",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数的平均数\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,30,day]",
|
||||
"generatedSchemaLinkings": "[访问次数,数据日期,30,day]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近30天访问次数的平均数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数的平均数\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [30,day]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,30,day]",
|
||||
"generatedSchemaLinkings": "[数据日期,访问次数,30,day]"
|
||||
},
|
||||
{
|
||||
"question": "内容库近半年哪个月的访问次数汇总最高",
|
||||
"questionAugmented": "内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select MONTH(数据日期), sum(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期) order by sum(访问次数) desc limit 1",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"的访问次数汇总\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,1,0.5,year]",
|
||||
"generatedSchemaLinkings": "[访问次数,数据日期,1,0.5,year]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年哪个月的访问次数汇总最高 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"的访问次数汇总\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,1,0.5,year]",
|
||||
"generatedSchemaLinkings": "[数据日期,访问次数,1,0.5,year]"
|
||||
},
|
||||
{
|
||||
"question": "内容库近半年每个月的平均访问次数",
|
||||
"questionAugmented": "内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select MONTH(数据日期), avg(访问次数) from 内容库产品 where datediff('year', 数据日期, '2023-09-04') <= 0.5 group by MONTH(数据日期)",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"访问次数 (\" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,0.5,year]",
|
||||
"generatedSchemaLinkings": "[访问次数,数据日期,0.5,year]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库近半年每个月的平均访问次数 (补充信息:。当前的日期是2023-09-04) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问次数 (\" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [0.5,year]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,0.5,year]",
|
||||
"generatedSchemaLinkings": "[数据日期,访问次数,0.5,year]"
|
||||
},
|
||||
{
|
||||
"question": "内容库 按部门统计访问次数 top10 的部门",
|
||||
"questionAugmented": "内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 部门, sum(访问次数) from 内容库产品 group by 部门 order by sum(访问次数) desc limit 10",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"计访问次数 \" so we need column = [访问次数]\n\" 的部门 (\" so we need column = [部门]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [访问次数,部门,10]",
|
||||
"generatedSchemaLinkings": "[访问次数,部门,10]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"内容库 按部门统计访问次数 top10 的部门 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\" 的部门 (\" so we need column = [部门]\n\"计访问次数 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [10]. So the Schema_links are:\nSchema_links: [部门,访问次数,10]",
|
||||
"generatedSchemaLinkings": "[部门,访问次数,10]"
|
||||
},
|
||||
{
|
||||
"question": "超音速 近7个月,月度总访问量超过 2万的月份",
|
||||
"questionAugmented": "超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )",
|
||||
"modelName": "内容库产品",
|
||||
"dbSchema": "Table: 内容库产品, Columns = [\"用户名\", \"部门\", \"模块\", \"访问时长\", \"访问次数\", \"访问人数\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select MONTH(数据日期) from 内容库产品 where datediff('month', 数据日期, '2023-09-10') <= 7 group by MONTH(数据日期) having sum(访问次数) > 20000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"访问量超过 \" so we need column = [访问次数]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [访问次数,数据日期,7,20000,month]",
|
||||
"generatedSchemaLinkings": "[访问次数,数据日期,7,20000,month]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"超音速 近7个月,月度总访问量超过 2万的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"当前的日期是\" so we need column = [数据日期]\n\"访问量超过 \" so we need column = [访问次数]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [7,20000,month]. So the Schema_links are:\nSchema_links: [数据日期,访问次数,7,20000,month]",
|
||||
"generatedSchemaLinkings": "[数据日期,访问次数,7,20000,month]"
|
||||
},
|
||||
{
|
||||
"question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量",
|
||||
"questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期)",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量 \" so we need column = [播放量]\n\"运营播放量 \" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]",
|
||||
"generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,year,100,2022-07-01,2023-07-01]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top 100,再按月粒度来统计近1年的运营播放量 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"运营播放量 \" so we need column = [播放量]\n\"当前的日期是\" so we need column = [数据日期]\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量 \" so we need column = [运营播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [播放量,数据日期,发布日期,运营播放量,1,year,100,2022-07-01,2023-07-01]",
|
||||
"generatedSchemaLinkings": "[播放量,数据日期,发布日期,运营播放量,1,year,100,2022-07-01,2023-07-01]"
|
||||
},
|
||||
{
|
||||
"question": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份",
|
||||
"questionAugmented": "2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )",
|
||||
"modelName": "歌曲库",
|
||||
"dbSchema": "Table: 歌曲库, Columns = [\"歌曲语言\", \"歌曲来源\", \"运营播放量\", \"播放量\", \"歌曲名\", \"结算播放量\", \"专辑名\", \"发布日期\", \"歌曲版本\", \"歌曲类型\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select MONTH(数据日期), sum(运营播放量) from (select 数据日期, 运营播放量 from 歌曲库 where 发布日期 >= '2022-07-01' and 发布日期 <= '2023-07-01' order by 播放量 desc limit 100) t where datediff('year', 数据日期, '2023-09-10') <= 1 group by MONTH(数据日期) having sum(运营播放量) > 2000",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"月之间发布到\" so we need column = [发布日期]\n\"播放量之和,\" so we need column = [播放量]\n\"运营播放量之\" so we need column = [运营播放量]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2000,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]",
|
||||
"generatedSchemaLinkings": "[发布日期,播放量,运营播放量,数据日期,1,2000,year,100,2022-07-01,2023-07-01]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"2022年7月到2023年7月之间发布到歌曲,按播放量取top100,再按月粒度来统计近1年的运营播放量之和,筛选出其中运营播放量之和大于2k的月份 (补充信息:。当前的日期是2023-09-10) (备注: )\", we are asked:\n\"播放量之和,\" so we need column = [播放量]\n\"当前的日期是\" so we need column = [数据日期]\n\"月之间发布到\" so we need column = [发布日期]\n\"运营播放量之\" so we need column = [运营播放量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2000,year,100,2022-07-01,2023-07-01]. So the Schema_links are:\nSchema_links: [播放量,数据日期,发布日期,运营播放量,1,2000,year,100,2022-07-01,2023-07-01]",
|
||||
"generatedSchemaLinkings": "[播放量,数据日期,发布日期,运营播放量,1,2000,year,100,2022-07-01,2023-07-01]"
|
||||
},
|
||||
{
|
||||
"question": "今年智能机在哪个国家的销量之和最高",
|
||||
"questionAugmented": "今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )",
|
||||
"modelName": "营销月模型",
|
||||
"dbSchema": "Table: 营销月模型, Columns = [\"国家中文名\", \"机型类别\", \"销量\", \"数据日期\"]\nForeign_keys: []",
|
||||
"sql": "select 国家中文名, sum(销量) from 营销月模型 where 机型类别 = '智能机' and 数据日期 >= '2023-01-01' and 数据日期 <= '2023-11-01' group by 国家中文名 order by sum(销量) desc limit 1",
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )\", we are asked:\n\"国家的销量之和\" so we need column = [国家中文名]\n\"个国家的销量\" so we need column = [销量]\n\"’机型类别‘\" so we need column = [机型类别]\n\"当前的日期是\" so we need column = [数据日期]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2023-11-01,智能机,2023-01-01]. So the Schema_links are:\nSchema_links: [国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]",
|
||||
"generatedSchemaLinkings": "[国家中文名,销量,机型类别,数据日期,1,2023-11-01,智能机,2023-01-01]"
|
||||
"generatedSchemaLinkingCoT": "Let’s think step by step. In the question \"今年智能机在哪个国家的销量之和最高 (补充信息:’'智能机'‘是一个’机型类别‘。当前的日期是2023-11-01) (备注: )\", we are asked:\n\"’机型类别‘\" so we need column = [机型类别]\n\"当前的日期是\" so we need column = [数据日期]\n\"国家的销量之和\" so we need column = [国家中文名]\n\"个国家的销量\" so we need column = [销量]\nBased on the tables, columns, and Foreign_keys, The set of possible cell values are = [1,2023-11-01,智能机,2023-01-01]. So the Schema_links are:\nSchema_links: [机型类别,数据日期,国家中文名,销量,1,2023-11-01,智能机,2023-01-01]",
|
||||
"generatedSchemaLinkings": "[机型类别,数据日期,国家中文名,销量,1,2023-11-01,智能机,2023-01-01]"
|
||||
}
|
||||
]
|
||||
@@ -1,6 +1,14 @@
|
||||
# -*- coding:utf-8 -*-
|
||||
|
||||
from loguru import logger
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from config.config_parse import LOG_FILE_PATH
|
||||
|
||||
logger.remove() #remove the old handler. Else, the old one will work along with the new one you've added below'
|
||||
logger.add(sys.stdout, format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", level="INFO")
|
||||
|
||||
logger.add(LOG_FILE_PATH, rotation="500 MB", retention="7 days", format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", level="INFO")
|
||||
|
||||
@@ -70,6 +70,7 @@ def transform_sql_example_autoCoT_run(examplar_list, min_window_size, max_window
|
||||
transformed_sql_examplar = dict()
|
||||
transformed_sql_examplar['question'] = question
|
||||
transformed_sql_examplar['questionAugmented'] = question_augmented
|
||||
transformed_sql_examplar['modelName'] = table_name
|
||||
transformed_sql_examplar['dbSchema'] = db_schema
|
||||
transformed_sql_examplar['sql'] = sql
|
||||
transformed_sql_examplar['generatedSchemaLinkingCoT'] = generated_schema_linking_cot
|
||||
|
||||
@@ -33,7 +33,26 @@ class Text2DSLAgentBase(object):
|
||||
def get_examples_candidates(self, question: str, filter_condition: Mapping[str, str], num_examples: int)->List[Mapping[str, str]]:
|
||||
few_shot_example_meta_list = self.sql_example_prompter.retrieve_few_shot_example(question, num_examples, filter_condition)
|
||||
|
||||
return few_shot_example_meta_list
|
||||
if len(few_shot_example_meta_list) == num_examples:
|
||||
return few_shot_example_meta_list
|
||||
elif len(few_shot_example_meta_list) < num_examples:
|
||||
logger.info(f"few_shot_example_meta_list size: {len(few_shot_example_meta_list)} < num_examples: {num_examples}")
|
||||
existed_id_set = set([item['id'] for item in few_shot_example_meta_list])
|
||||
extra_few_shot_example_meta_list = self.sql_example_prompter.retrieve_few_shot_example(query_text=question, retrieval_num=num_examples, filter_condition=None)
|
||||
|
||||
for item in extra_few_shot_example_meta_list:
|
||||
if item['id'] not in existed_id_set:
|
||||
few_shot_example_meta_list.append(item)
|
||||
existed_id_set.add(item['id'])
|
||||
if len(few_shot_example_meta_list) == num_examples:
|
||||
break
|
||||
|
||||
logger.info(f"few_shot_example_meta_list size: {len(few_shot_example_meta_list)} = num_examples: {num_examples}")
|
||||
return few_shot_example_meta_list
|
||||
else:
|
||||
logger.info(f"few_shot_example_meta_list size: {len(few_shot_example_meta_list)} > num_examples: {num_examples}")
|
||||
few_shot_example_meta_list = few_shot_example_meta_list[:num_examples]
|
||||
return few_shot_example_meta_list
|
||||
|
||||
def get_fewshot_example_combos(self, example_meta_list:List[Mapping[str, str]], num_fewshots:int)-> List[List[Mapping[str, str]]]:
|
||||
fewshot_example_list = []
|
||||
@@ -124,6 +143,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
|
||||
schema_linking_prompt = instruction + '\n\n' + schema_linking_fewshot_prompt + '\n\n' + new_case_prompt
|
||||
|
||||
logger.info(f'schema_linking_prompt: {schema_linking_prompt}')
|
||||
return schema_linking_prompt
|
||||
|
||||
|
||||
@@ -154,6 +174,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
|
||||
sql_example_prompt = instruction + '\n\n' + sql_example_fewshot_prompt + '\n\n' + new_case_prompt
|
||||
|
||||
logger.info(f'sql_example_prompt: {sql_example_prompt}')
|
||||
return sql_example_prompt
|
||||
|
||||
def generate_sql_prompt_pool(self, question: str, domain_name: str,fields_list: List[str],
|
||||
@@ -183,6 +204,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
|
||||
prompt = instruction + '\n\n' + fewshot_prompt + '\n\n' + new_case_prompt
|
||||
|
||||
logger.info(f'schema_linking_sql_prompt: {prompt}')
|
||||
return prompt
|
||||
|
||||
def generate_schema_linking_sql_prompt_pool(self, question: str, current_date:str, domain_name: str, fields_list: List[str],
|
||||
@@ -223,6 +245,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
resp['model'] = model_name
|
||||
resp['fields'] = fields_list
|
||||
resp['priorSchemaLinking'] = prior_schema_links
|
||||
resp['priorExts'] = prior_exts
|
||||
resp['currentDate'] = current_date
|
||||
|
||||
resp['schemaLinkingOutput'] = schema_link_output
|
||||
@@ -259,6 +282,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
resp['model'] = model_name
|
||||
resp['fields'] = fields_list
|
||||
resp['priorSchemaLinking'] = prior_schema_links
|
||||
resp['priorExts'] = prior_exts
|
||||
resp['currentDate'] = current_date
|
||||
|
||||
resp['schemaLinkingComboOutput'] = schema_linking_sql_shortcut_output
|
||||
@@ -330,6 +354,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
resp['model'] = model_name
|
||||
resp['fields'] = fields_list
|
||||
resp['priorSchemaLinking'] = prior_schema_links
|
||||
resp['priorExts'] = prior_exts
|
||||
resp['currentDate'] = current_date
|
||||
|
||||
resp['schemaLinkStr'] = schema_linking_output_max
|
||||
@@ -372,6 +397,7 @@ class Text2DSLAgentAutoCoT(Text2DSLAgentBase):
|
||||
resp['model'] = model_name
|
||||
resp['fields'] = fields_list
|
||||
resp['priorSchemaLinking'] = prior_schema_links
|
||||
resp['priorExts'] = prior_exts
|
||||
resp['currentDate'] = current_date
|
||||
|
||||
resp['schemaLinkStr'] = schema_linking_output_max
|
||||
|
||||
Reference in New Issue
Block a user