|
|
|
@ -29,9 +29,33 @@ router.get('/', async (req, res) => {
@@ -29,9 +29,33 @@ router.get('/', async (req, res) => {
|
|
|
|
|
return res.status(200).json(data); |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
/** |
|
|
|
|
* 参数列表 |
|
|
|
|
* uuid: 用户ID |
|
|
|
|
* url: 目标网址 |
|
|
|
|
* platform: 目标网址所属平台,可选值:[douyin, kuaishou, xigua, bilibili] |
|
|
|
|
* contract: '', 需要抓取的数据合约,凡是支持此合约的爬虫将根据合约内容抓取数据(具体参考爬虫所支持的合约) |
|
|
|
|
* data_mode: 返回数据格式,可选值:[json, html] |
|
|
|
|
* country: 国家代码 |
|
|
|
|
* lang: 语言代码 |
|
|
|
|
* notify_url: 通知回调网址 |
|
|
|
|
* sign: 参数签名,签名方法见README.md“接口参数签名方法” |
|
|
|
|
**/ |
|
|
|
|
router.post('/newtask/', async (req, res) => { |
|
|
|
|
let uuid = req.body.uuid, |
|
|
|
|
url = req.body.url, |
|
|
|
|
platform = req.body.platform, |
|
|
|
|
data_mode = req.body.data_mode, |
|
|
|
|
country = req.body.country, |
|
|
|
|
lang = req.body.lang, |
|
|
|
|
notify_url = req.body.notify_url; |
|
|
|
|
|
|
|
|
|
return res.send('api/newtask/'); |
|
|
|
|
let data = {code: 0, message: ''}; |
|
|
|
|
|
|
|
|
|
//参数格式检查
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return res.status(200).json(data); |
|
|
|
|
}); |
|
|
|
|
|
|
|
|
|
router.get('/gettask/', async (req, res) => { |
|
|
|
@ -54,17 +78,23 @@ router.get('/querytask/', async (req, res) => {
@@ -54,17 +78,23 @@ router.get('/querytask/', async (req, res) => {
|
|
|
|
|
* name |
|
|
|
|
* description |
|
|
|
|
* status: [idle, busy] |
|
|
|
|
* platforms: '', //支持的平台,可由爬虫定义
|
|
|
|
|
* contracts: '', //支持的数据抓取合约,具体内容由爬虫定义
|
|
|
|
|
* timestamp |
|
|
|
|
* country |
|
|
|
|
* lang |
|
|
|
|
* contact //爬虫提供方的联系方式
|
|
|
|
|
*/ |
|
|
|
|
router.post('/onboard/', async (req, res) => { |
|
|
|
|
let bot_name = req.body.name, |
|
|
|
|
bot_desc = req.body.description, |
|
|
|
|
status = req.body.status, |
|
|
|
|
platforms = req.body.platforms, //多个则用英文逗号间隔
|
|
|
|
|
contracts = req.body.contracts, //多个则用英文逗号间隔
|
|
|
|
|
timestamp = req.body.timestamp, |
|
|
|
|
country = req.body.country, |
|
|
|
|
lang = req.body.lang; |
|
|
|
|
lang = req.body.lang, |
|
|
|
|
contact = req.body.contact; |
|
|
|
|
|
|
|
|
|
let data = { |
|
|
|
|
"code": 0, |
|
|
|
@ -72,8 +102,8 @@ router.post('/onboard/', async (req, res) => {
@@ -72,8 +102,8 @@ router.post('/onboard/', async (req, res) => {
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
//参数格式检查
|
|
|
|
|
if (!bot_name || !bot_desc || !status || !timestamp) { |
|
|
|
|
data.message = '必填参数name、description、status、timestamp不能为空'; |
|
|
|
|
if (!bot_name || !bot_desc || !status || !timestamp || !platforms || !contracts) { |
|
|
|
|
data.message = '必填参数name、description、status、platforms、contracts、timestamp不能为空'; |
|
|
|
|
}else if (common.isBotNameOk(bot_name) == false) { |
|
|
|
|
data.message = '爬虫名字必须是6 - 32位英文字母、下划线的组合'; |
|
|
|
|
}else if (typeof(bot_desc) != 'string' || bot_desc.length > 100) { |
|
|
|
@ -82,10 +112,16 @@ router.post('/onboard/', async (req, res) => {
@@ -82,10 +112,16 @@ router.post('/onboard/', async (req, res) => {
|
|
|
|
|
data.message = '爬虫状态status传参错误,其可选值:idle、busy'; |
|
|
|
|
}else if (common.isTimestampInSeconds(timestamp) == false) { |
|
|
|
|
data.message = '时间戳timestamp请传秒数'; |
|
|
|
|
}else if (common.isPlatformsOk(platforms) == false) { |
|
|
|
|
data.message = '支持的平台platforms应为英文逗号间隔的3 - 100个英文字符串'; |
|
|
|
|
}else if (common.isContractsOk(contracts) == false) { |
|
|
|
|
data.message = '支持的合约contracts应为英文逗号间隔的3 - 100个英文字符串'; |
|
|
|
|
}else if (country && common.isIosCountryCode(country) == false) { |
|
|
|
|
data.message = '国家代码country请传小写的两位字母,参考两位ISO CODES:https://countrycode.org/'; |
|
|
|
|
}else if (lang && common.isIosLangCode(lang) == false) { |
|
|
|
|
data.message = '语言代码lang请传小写的两位字母,参考ISO 639-1 Code:https://www.loc.gov/standards/iso639-2/php/code_list.php'; |
|
|
|
|
}else if (contact && common.isContactOk(contact) == false) { |
|
|
|
|
data.message = '联系方式contact应为6 - 50个字符'; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (!data.message) { |
|
|
|
@ -94,6 +130,9 @@ router.post('/onboard/', async (req, res) => {
@@ -94,6 +130,9 @@ router.post('/onboard/', async (req, res) => {
|
|
|
|
|
description: bot_desc, |
|
|
|
|
status: status, |
|
|
|
|
timestamp: timestamp, |
|
|
|
|
platforms: platforms.split(','), |
|
|
|
|
contracts: contracts.split(','), |
|
|
|
|
contact: contact, |
|
|
|
|
//如果没传则填充默认值
|
|
|
|
|
country: country ? country.toLowerCase() : 'cn', |
|
|
|
|
lang: lang ? lang.toLowerCase() : 'zh' |
|
|
|
|