From 99ab2835e68ed08d9a7577ccbf2248577f3870e1 Mon Sep 17 00:00:00 2001 From: filesite Date: Tue, 9 Apr 2024 23:39:17 +0800 Subject: [PATCH] api newtask ready for test --- README.md | 24 +++++++++++++++++++- common.mjs | 17 +++++++++++++++ router_api.mjs | 52 ++++++++++++++++++++++++++++++++++++++++++-- test/common.test.mjs | 19 +++++++++++++++- 4 files changed, 108 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 49bb4c1..51b2642 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,25 @@ Hero Union主要做两件事: Hero Union联盟网站:[Hero Union英雄联盟](https://herounion.filesite.io/)。 +接口返回值示例及其说明: + +执行成功: +``` +{ + "code": 1, + "message": "完成", + 其它数据... +} +``` + +执行失败: +{ + "code": 0, + "message": "错误信息" +} +``` + + 以下为联盟所有接口的详细文档: ### 爬虫任务领取接口 @@ -80,9 +99,12 @@ sign 参数说明: * platform: url所属平台,目前支持的:抖音、快手、西瓜视频、bilibili -* contract: 数据抓取合约,目前支持的:tajiantv,可由爬虫自己定义并自己实现合约规则 +* contract: 数据抓取合约,目前支持的:tajiantv,可由爬虫自定义并实现合约规则 * data_mode: 返回数据格式,默认:json,可选值:json、html +返回值: +如果提交完成,会返回新任务数据task。 + ### 查询网页抓取任务结果接口 diff --git a/common.mjs b/common.mjs index 011bdf7..d3eacfc 100644 --- a/common.mjs +++ b/common.mjs @@ -135,6 +135,23 @@ class Common { return /^\S{6,50}$/i.test(contact); } + //检查url是否符合要求 + isUrlOk(url) { + return /^http(s)?:\/\/[\w\.]{6,100}$/i.test(url); + } + + //检查uuid是否符合要求:6-32位的英文字符串 + isUuidOk(uuid) { + return /^\w{6,32}$/i.test(uuid); + } + + //检查英文名等参数是否符合标准:5 - 32位字母和下划线的组合 + isNormalName(name, minLength, maxLength) { + if (typeof(minLength) == 'undefined') {minLength = 6;} + if (typeof(maxLength) == 'undefined') {maxLength = 32;} + return /^\w+$/i.test(name) && name.length >= minLength && name.length <= maxLength; + } + getLogArguments() { let args = []; let localTime = this.getLocalTimeString('zh-Hans-CN', 'Asia/Shanghai'); diff --git a/router_api.mjs b/router_api.mjs index ca823d3..c921eaa 100644 --- a/router_api.mjs +++ b/router_api.mjs @@ -19,6 +19,8 @@ router.get('/', async (req, res) => { "/api/onboard/": "爬虫状态上报到联盟", "/api/stats/": "查看联盟状态", + + "/api/newtask/": "向联盟提交新的爬虫任务", }; const data = { @@ -34,7 +36,7 @@ router.get('/', async (req, res) => { * uuid: 用户ID * url: 目标网址 * platform: 目标网址所属平台,可选值:[douyin, kuaishou, xigua, bilibili] - * contract: '', 需要抓取的数据合约,凡是支持此合约的爬虫将根据合约内容抓取数据(具体参考爬虫所支持的合约) + * contract: 需要抓取的数据合约,凡是支持此合约的爬虫将根据合约内容抓取数据(具体参考爬虫所支持的合约) * data_mode: 返回数据格式,可选值:[json, html] * country: 国家代码 * lang: 语言代码 @@ -45,15 +47,61 @@ router.post('/newtask/', async (req, res) => { let uuid = req.body.uuid, url = req.body.url, platform = req.body.platform, + contract = req.body.contract, data_mode = req.body.data_mode, country = req.body.country, lang = req.body.lang, - notify_url = req.body.notify_url; + notify_url = req.body.notify_url, + sign = req.body.sign; let data = {code: 0, message: ''}; //参数格式检查 + if (!uuid || !url || !platform || !contract || !sign) { + data.message = '必选参数uuid、url、platform、contract、sign不能为空'; + }else if (common.isUuidOk(uuid) == false) { + data.message = '参数uuid应为6-32位的英文字符串,请联系管理员获得'; + }else if (common.isUrlOk(url) == false) { + data.message = '参数url必须是一个网址'; + }else if (common.isNormalName(platform, 5) == false) { + data.message = '平台名platform应为5-32位的英文字符串'; + }else if (common.isNormalName(contract, 5) == false) { + data.message = '合约contract应为5-32位的英文字符串'; + }else if (data_mode && data_mode != 'json' && data_mode != 'html') { + data.message = '数据格式data_mode可选值:json, html'; + }else if (country && common.isIosCountryCode(country) == false) { + data.message = '国家代码country请传小写的两位字母,参考两位ISO CODES:https://countrycode.org/'; + }else if (lang && common.isIosLangCode(lang) == false) { + data.message = '语言代码lang请传小写的两位字母,参考ISO 639-1 Code:https://www.loc.gov/standards/iso639-2/php/code_list.php'; + }else if (notify_url && common.isUrlOk(notify_url) == false) { + data.message = '参数notify_url必须是一个网址'; + }else if (common.isNormalName(sign, 32, 32) == false) { + data.message = '签名sign应为32位的英文字符串'; + } + //签名检查 + let userToken = await heroUnion.getUserToken(uuid); + if (!userToken) { + data.message = `用户 ${uuid} 不存在,请检查参数uuid并确认大小写完整正确`; + }else { + let paramsCheck = {}; + for (const key in req.body) { + if (key != 'sign') { + paramsCheck[key] = req.body[key]; + } + } + + let mySign = common.sign(paramsCheck, userToken); + if (mySign.toLowerCase() != sign.toLowerCase()) { + data.message = `签名 ${sign} 不匹配,请确保token正确及签名方法跟文档一致`; + } + } + + if (!data.message) { + data.task = heroUnion.createTask(uuid, url, platform, contract, data_mode, notify_url, country, lang); + data.code = 1; + data.message = '新爬虫任务提交完成'; + } return res.status(200).json(data); }); diff --git a/test/common.test.mjs b/test/common.test.mjs index 03e995b..6cb8675 100644 --- a/test/common.test.mjs +++ b/test/common.test.mjs @@ -5,7 +5,7 @@ import test from 'node:test'; import assert from 'node:assert'; import common from '../common.mjs'; - +import md5 from 'md5'; test('Common function sortDict test', (t) => { let params = { @@ -81,3 +81,20 @@ test('Common function log/info/warn/error test', async (t) => { console.log("插入日期后的参数:\n%s", args); }); + +test('Common function isNormalName test', async (t) => { + let case1 = common.isNormalName('test01', 5); + assert.equal(case1, true); + + let case2 = common.isNormalName('test01', 8); + assert.equal(case2, false); + + let case3 = common.isNormalName('test0123456', 6, 10); + assert.equal(case3, false); + + let case4 = common.isNormalName('test0123456', 6, 15); + assert.equal(case4, true); + + let case5 = common.isNormalName(md5('test0123456'), 32, 32); + assert.equal(case5, true); +});