diff --git a/config.mjs b/config.mjs index 9712212..7ac683d 100644 --- a/config.mjs +++ b/config.mjs @@ -4,8 +4,9 @@ export default { data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io - //herounion对接配置 + //HeroUnion英雄联盟对接配置 herounion: { + server_url: 'http://127.0.0.1:8080', //联盟服务地址 name: 'machete_hero', //爬虫名字 description: '支持Machete的TaJian皮肤的hero爬虫', //爬虫简介 platforms: 'douyin,kuaishou,xigua,bilibili', //爬虫支持的平台 @@ -13,6 +14,7 @@ export default { country: 'cn', //爬虫所在国家 lang: 'zh', //爬虫支持的语言 contact: 'https://filesite.io', //爬虫的联系方式 + data_mode: 'json' //爬虫支持的数据格式 }, diff --git a/lib/heroBot.mjs b/lib/heroBot.mjs new file mode 100644 index 0000000..bc5b655 --- /dev/null +++ b/lib/heroBot.mjs @@ -0,0 +1,150 @@ +/** + * HeroUnion Bot SDK + */ + +import test from 'node:test'; +import assert from 'node:assert'; +import axios from 'axios'; +import md5 from 'md5'; + + +class HeroBot { + constructor( + server_url, + bot_name, + bot_description, + support_platforms, + support_contracts, + bot_country, + bot_lang, + bot_contact, + data_mode + ) { + //必填参数 + this.union_server = server_url; + this.name = bot_name; + this.description = bot_description; + this.platforms = support_platforms; + this.contracts = support_contracts; + + //可选参数 + this.country = typeof(bot_country) != 'undefined' ? bot_country : 'cn'; + this.lang = typeof(bot_lang) != 'undefined' ? bot_lang : 'zh'; + this.contact = typeof(bot_contact) != 'undefined' ? bot_contact : ''; + this.data_mode = typeof(data_mode) != 'undefined' ? data_mode : 'json'; + + //联盟API地址 + this.apis = { + "heartBeat": `${server_url}/api/onboard/`, + "getNewTask": `${server_url}/api/gettask/`, + "saveTaskData": `${server_url}/api/savetask/`, + }; + + //axios请求配置 + this.axiosConfig = { + timeout: 8000, //请求超时 + proxy: false //是否走代理 + }; + } + + getTimestampInSeconds() { + return Math.floor(Date.now() / 1000); + } + + sortDict(obj) { //dict按key排序 + return Object.keys(obj).sort().reduce(function(result, key) { + result[key] = obj[key]; + return result; + }, {}); + } + + sign(params, token) { //对参数做MD5签名 + return md5( JSON.stringify(this.sortDict(params)) + token ); + } + + //向联盟发送心跳数据 + async heartBeat(status) { + let params = { + name: this.name, + description: this.description, + status: status, + timestamp: this.getTimestampInSeconds(), + platforms: this.platforms, + contracts: this.contracts, + country: this.country, + lang: this.lang, + contact: this.contact + }; + + let response = null; + + try { + response = await axios.post(this.apis.heartBeat, params, this.axiosConfig); + }catch(err) { + console.error('[ERROR] HeroBot heart beat failed: %s, api: %s, params: %s', + err, + this.apis.heartBeat, + JSON.stringify(params) + ); + } + + return response ? response.data : false; + } + + //从联盟领取任务 + async getNewTask() { + let params = { + platforms: this.platforms, + contracts: this.contracts, + data_mode: this.data_mode, + country: this.country, + lang: this.lang + }; + + let queryOption = this.axiosConfig; + queryOption.method = 'get'; + queryOption.url = this.apis.getNewTask; + queryOption.params = params; + + let response = null; + + try { + response = await await axios(queryOption); + }catch(err) { + console.error('[ERROR] HeroBot get new task failed: %s, api: %s, params: %s', + err, + this.apis.getNewTask, + JSON.stringify(params) + ); + } + + return response && response.data.code == 1 ? response.data.task : false; + } + + //回传任务数据给联盟 + async saveTaskData(task_id, task_token, task_data) { + let params = { + name: this.name, + task_id: task_id, + task_result: task_data + }; + params.sign = this.sign(params, task_token); //对参数进行签名 + + let response = null; + + try { + response = await axios.post(this.apis.saveTaskData, params, this.axiosConfig); + }catch(err) { + console.error('[ERROR] HeroBot save task data failed: %s, api: %s, params: %s', + err, + this.apis.saveTaskData, + JSON.stringify(params) + ); + } + + return response ? response.data : false; + } + +} + +export default HeroBot; \ No newline at end of file diff --git a/lib/taskMoniter.mjs b/lib/taskMoniter.mjs index 786ed2f..bf158ca 100644 --- a/lib/taskMoniter.mjs +++ b/lib/taskMoniter.mjs @@ -9,11 +9,13 @@ * ------------------- * task数据结构:{id:'', url: '', status:''} */ +import configs from '../config.mjs'; import common from './common.mjs'; import fs from 'node:fs'; import { readdir, readFile } from 'node:fs/promises'; import path from 'node:path'; import cron from 'node-cron'; +import HeroBot from "./heroBot.mjs"; class TaskMoniter { constructor(task_list_dir) { @@ -36,6 +38,20 @@ class TaskMoniter { done: 'done', failed: 'failed', }; + + //HeroUnion英雄联盟对接 + let heroUnionConfig = configs.herounion; + this.heroBot = new HeroBot( + heroUnionConfig.server_url, + heroUnionConfig.name, + heroUnionConfig.description, + heroUnionConfig.platforms, + heroUnionConfig.contracts, + heroUnionConfig.country, + heroUnionConfig.lang, + heroUnionConfig.contact, + heroUnionConfig.data_mode + ); } getTaskFilePath(task_id) { @@ -159,6 +175,15 @@ class TaskMoniter { this.addTask(task); } + //从HeroUnion获取任务 + let unionTask = await this.heroBot.getNewTask(); + if (unionTask) { + console.log('Got new union task %s, url: %s', unionTask.id, unionTask.url); + unionTask.status = this.statusCode.waiting; + unionTask.from = 'HeroUnion'; //标记此任务来自联盟 + this.addTask(unionTask); + } + this.checking = false; }catch(error) { this.checking = false; diff --git a/package.json b/package.json index a23aebd..5a6cfe3 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,8 @@ "@ulixee/cloud": "^2.0.0-alpha.24", "@ulixee/hero": "^2.0.0-alpha.24", "node-cron": "^3.0.2", - "axios": "^1.3.3" + "axios": "^1.3.3", + "md5": "^2.3.0" }, "scripts": { "start": "node spider.mjs" diff --git a/spider.mjs b/spider.mjs index f9f1f6e..c0598c8 100644 --- a/spider.mjs +++ b/spider.mjs @@ -6,6 +6,7 @@ import configs from './config.mjs'; import common from './lib/common.mjs'; import TaskMoniter from "./lib/taskMoniter.mjs"; import TaJian from "./lib/tajian.mjs"; +import HeroBot from "./lib/heroBot.mjs"; import Douyin from './bot/Douyin.mjs'; import Kuaishou from './bot/Kuaishou.mjs'; @@ -37,18 +38,18 @@ import cron from 'node-cron'; console.log('New task %s handle by bot %s.', task.url, botName); let bot = null; switch (botName) { - case 'douyin': - bot = new Douyin(heroCloudServer); - break; - case 'kuaishou': - bot = new Kuaishou(heroCloudServer); - break; - case 'xigua': - bot = new Xigua(heroCloudServer); - break; - case 'bilibili': - bot = new Bilibili(heroCloudServer); - break; + case 'douyin': + bot = new Douyin(heroCloudServer); + break; + case 'kuaishou': + bot = new Kuaishou(heroCloudServer); + break; + case 'xigua': + bot = new Xigua(heroCloudServer); + break; + case 'bilibili': + bot = new Bilibili(heroCloudServer); + break; } if (bot) { @@ -62,30 +63,52 @@ import cron from 'node-cron'; if ( await tajian.saveUrlShortcut(task.id, data) && await tajian.saveDescriptionFiles(task.id, data) - ) { + ) { taskMoniter.setTaskDone(task.id); + }else { + taskMoniter.setTaskFailed(task.id); + } }else { taskMoniter.setTaskFailed(task.id); } + + spider_is_running = false; }else { + console.error('No bot matched with url %s', task.url); + taskMoniter.setTaskRunning(task.id); taskMoniter.setTaskFailed(task.id); } - - spider_is_running = false; - }else { - console.error('No bot matched with url %s', task.url); - taskMoniter.setTaskRunning(task.id); - taskMoniter.setTaskFailed(task.id); - } -}, { - scheduled: false -}); + }, { + scheduled: false + }); task_auto_run.start(); console.log('[%s] Spider started.', common.getTimeString()); - //TODO: 对接英雄联盟接口:https://herounion.filesite.io + //HeroUnion英雄联盟对接 + let heroUnionConfig = configs.herounion; + let heroBot = new HeroBot( + heroUnionConfig.server_url, + heroUnionConfig.name, + heroUnionConfig.description, + heroUnionConfig.platforms, + heroUnionConfig.contracts, + heroUnionConfig.country, + heroUnionConfig.lang, + heroUnionConfig.contact, + heroUnionConfig.data_mode + ); + + //爬虫心跳上报 + const heartBeatFrequence = 60; //1 分钟上报一次 + const heroUnionHeartBeat = cron.schedule(`*/${heartBeatFrequence} * * * * *`, async () => { + let status = spider_is_running ? 'busy' : 'idle'; + const res = await heroBot.heartBeat(status); + console.log('HeroUnion bot heart beat result', res); + }, {scheduled: false}); + heroUnionHeartBeat.start(); + console.log('[%s] HeroUnion bot heart beat started.', common.getTimeString()); })().catch(error => { console.error("Spider error got:\n%s", error); process.exit(1);