Browse Source

api newtask ready for test

master
filesite 9 months ago
parent
commit
99ab2835e6
  1. 24
      README.md
  2. 17
      common.mjs
  3. 52
      router_api.mjs
  4. 19
      test/common.test.mjs

24
README.md

@ -29,6 +29,25 @@ Hero Union主要做两件事: @@ -29,6 +29,25 @@ Hero Union主要做两件事:
Hero Union联盟网站:[Hero Union英雄联盟](https://herounion.filesite.io/)。
接口返回值示例及其说明:
执行成功:
```
{
"code": 1,
"message": "完成",
其它数据...
}
```
执行失败:
{
"code": 0,
"message": "错误信息"
}
```
以下为联盟所有接口的详细文档:
### 爬虫任务领取接口
@ -80,9 +99,12 @@ sign @@ -80,9 +99,12 @@ sign
参数说明:
* platform: url所属平台,目前支持的:抖音、快手、西瓜视频、bilibili
* contract: 数据抓取合约,目前支持的:tajiantv,可由爬虫自定义并自己实现合约规则
* contract: 数据抓取合约,目前支持的:tajiantv,可由爬虫自定义并实现合约规则
* data_mode: 返回数据格式,默认:json,可选值:json、html
返回值:
如果提交完成,会返回新任务数据task。
### 查询网页抓取任务结果接口

17
common.mjs

@ -135,6 +135,23 @@ class Common { @@ -135,6 +135,23 @@ class Common {
return /^\S{6,50}$/i.test(contact);
}
//检查url是否符合要求
isUrlOk(url) {
return /^http(s)?:\/\/[\w\.]{6,100}$/i.test(url);
}
//检查uuid是否符合要求:6-32位的英文字符串
isUuidOk(uuid) {
return /^\w{6,32}$/i.test(uuid);
}
//检查英文名等参数是否符合标准:5 - 32位字母和下划线的组合
isNormalName(name, minLength, maxLength) {
if (typeof(minLength) == 'undefined') {minLength = 6;}
if (typeof(maxLength) == 'undefined') {maxLength = 32;}
return /^\w+$/i.test(name) && name.length >= minLength && name.length <= maxLength;
}
getLogArguments() {
let args = [];
let localTime = this.getLocalTimeString('zh-Hans-CN', 'Asia/Shanghai');

52
router_api.mjs

@ -19,6 +19,8 @@ router.get('/', async (req, res) => { @@ -19,6 +19,8 @@ router.get('/', async (req, res) => {
"/api/onboard/": "爬虫状态上报到联盟",
"/api/stats/": "查看联盟状态",
"/api/newtask/": "向联盟提交新的爬虫任务",
};
const data = {
@ -34,7 +36,7 @@ router.get('/', async (req, res) => { @@ -34,7 +36,7 @@ router.get('/', async (req, res) => {
* uuid: 用户ID
* url: 目标网址
* platform: 目标网址所属平台可选值[douyin, kuaishou, xigua, bilibili]
* contract: '', 需要抓取的数据合约凡是支持此合约的爬虫将根据合约内容抓取数据具体参考爬虫所支持的合约
* contract: 需要抓取的数据合约凡是支持此合约的爬虫将根据合约内容抓取数据具体参考爬虫所支持的合约
* data_mode: 返回数据格式可选值[json, html]
* country: 国家代码
* lang: 语言代码
@ -45,15 +47,61 @@ router.post('/newtask/', async (req, res) => { @@ -45,15 +47,61 @@ router.post('/newtask/', async (req, res) => {
let uuid = req.body.uuid,
url = req.body.url,
platform = req.body.platform,
contract = req.body.contract,
data_mode = req.body.data_mode,
country = req.body.country,
lang = req.body.lang,
notify_url = req.body.notify_url;
notify_url = req.body.notify_url,
sign = req.body.sign;
let data = {code: 0, message: ''};
//参数格式检查
if (!uuid || !url || !platform || !contract || !sign) {
data.message = '必选参数uuid、url、platform、contract、sign不能为空';
}else if (common.isUuidOk(uuid) == false) {
data.message = '参数uuid应为6-32位的英文字符串,请联系管理员获得';
}else if (common.isUrlOk(url) == false) {
data.message = '参数url必须是一个网址';
}else if (common.isNormalName(platform, 5) == false) {
data.message = '平台名platform应为5-32位的英文字符串';
}else if (common.isNormalName(contract, 5) == false) {
data.message = '合约contract应为5-32位的英文字符串';
}else if (data_mode && data_mode != 'json' && data_mode != 'html') {
data.message = '数据格式data_mode可选值:json, html';
}else if (country && common.isIosCountryCode(country) == false) {
data.message = '国家代码country请传小写的两位字母,参考两位ISO CODES:https://countrycode.org/';
}else if (lang && common.isIosLangCode(lang) == false) {
data.message = '语言代码lang请传小写的两位字母,参考ISO 639-1 Code:https://www.loc.gov/standards/iso639-2/php/code_list.php';
}else if (notify_url && common.isUrlOk(notify_url) == false) {
data.message = '参数notify_url必须是一个网址';
}else if (common.isNormalName(sign, 32, 32) == false) {
data.message = '签名sign应为32位的英文字符串';
}
//签名检查
let userToken = await heroUnion.getUserToken(uuid);
if (!userToken) {
data.message = `用户 ${uuid} 不存在,请检查参数uuid并确认大小写完整正确`;
}else {
let paramsCheck = {};
for (const key in req.body) {
if (key != 'sign') {
paramsCheck[key] = req.body[key];
}
}
let mySign = common.sign(paramsCheck, userToken);
if (mySign.toLowerCase() != sign.toLowerCase()) {
data.message = `签名 ${sign} 不匹配,请确保token正确及签名方法跟文档一致`;
}
}
if (!data.message) {
data.task = heroUnion.createTask(uuid, url, platform, contract, data_mode, notify_url, country, lang);
data.code = 1;
data.message = '新爬虫任务提交完成';
}
return res.status(200).json(data);
});

19
test/common.test.mjs

@ -5,7 +5,7 @@ @@ -5,7 +5,7 @@
import test from 'node:test';
import assert from 'node:assert';
import common from '../common.mjs';
import md5 from 'md5';
test('Common function sortDict test', (t) => {
let params = {
@ -81,3 +81,20 @@ test('Common function log/info/warn/error test', async (t) => { @@ -81,3 +81,20 @@ test('Common function log/info/warn/error test', async (t) => {
console.log("插入日期后的参数:\n%s", args);
});
test('Common function isNormalName test', async (t) => {
let case1 = common.isNormalName('test01', 5);
assert.equal(case1, true);
let case2 = common.isNormalName('test01', 8);
assert.equal(case2, false);
let case3 = common.isNormalName('test0123456', 6, 10);
assert.equal(case3, false);
let case4 = common.isNormalName('test0123456', 6, 15);
assert.equal(case4, true);
let case5 = common.isNormalName(md5('test0123456'), 32, 32);
assert.equal(case5, true);
});

Loading…
Cancel
Save