diff --git a/README.md b/README.md index b137448..6e247d7 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,10 @@ # Hero Union - 英雄联盟 -Union of hero bots. -一个Hero的爬虫联盟。 +Union of hero bots,一个Hero的爬虫联盟。 Hero Union主要做两件事: -* 加入联盟的爬虫将定期到联盟领取网页抓取任务,并将任务结果回传 +* 加入联盟的爬虫定期到联盟领取网页抓取任务,并将任务结果回传 * 对外提供提交网页抓取任务和获取任务结果的接口供联盟成员使用,并支持任务完成回调通知 @@ -41,7 +40,7 @@ Hero Union主要做两件事: ### 联盟的爬虫工作流程 1. 本地启动爬虫后,定期向联盟上报爬虫状态 -2. 爬虫定期向联盟领取新的网页抓取任务 +2. 爬虫定期从联盟领取新的网页抓取任务 3. 爬虫完成网页抓取任务时调用接口上报给联盟 @@ -272,9 +271,7 @@ var sign = md5( JSON.stringify(sortedParams) + token ); ## Hero Union英雄联盟开发进度 -更新日期:2024-4-10 - -* v0.1 - beta 开发中,完成进度 80% 左右 +* v0.1 - beta 2024-04-11 已完成 ## 其它参考 diff --git a/common.mjs b/common.mjs index acb9d8a..ec4c700 100644 --- a/common.mjs +++ b/common.mjs @@ -154,7 +154,7 @@ class Common { //检查url是否符合要求 isUrlOk(url) { - return /^http(s)?:\/\/[\w\.\/]{6,100}$/i.test(url); + return /^http(s)?:\/\/[\w\.\/:]{6,100}$/i.test(url); } //检查uuid是否符合要求:6-32位的英文字符串 diff --git a/conf/config.json b/conf/config.json index a35730d..b2a39b9 100644 --- a/conf/config.json +++ b/conf/config.json @@ -12,9 +12,12 @@ "reloadConfigFrequence": 60, "heroHeartCheckFrequence": 60, "autoCleanTaskFrequence": 300, + "autoNotifyTaskFrequence": 20, "max_list_hero_num": 1000, + "axios_proxy": false, + "tokens": { "herounion_demo": "hello#world!" } diff --git a/heroUnion.mjs b/heroUnion.mjs index 67fe6cd..1edcf3d 100644 --- a/heroUnion.mjs +++ b/heroUnion.mjs @@ -38,6 +38,7 @@ class HeroUnion { this.notify_max_try = 5; //回调通知最多尝试次数 this.heroHeartTimeout = 600; //爬虫心跳超时时长,单位:秒 this.max_list_hero_num = 1000; //在接口getHeros()里最多返回的爬虫数量 + this.axios_proxy = false; //axios库发送请求时是否使用系统代理 this.stats = { start_time: common.getTimestampInSeconds() @@ -110,6 +111,10 @@ class HeroUnion { if (typeof(this.config.max_list_hero_num) != 'undefined' && this.config.max_list_hero_num) { this.max_list_hero_num = this.config.max_list_hero_num; //最大返回爬虫数量 } + + if (typeof(this.config.axios_proxy) != 'undefined' && this.config.axios_proxy) { + this.axios_proxy = this.config.axios_proxy; + } } return this.config; @@ -298,47 +303,46 @@ class HeroUnion { } //任务完成触发回调通知 - async handleTaskDone(id) { + async handleTaskDone(task) { let notified = false; - - let task = this.getTaskById(id); let notify_url = task.notify_url; - - try { - if (notify_url && /^http(s)?:\/\/[\w\.]+/i.test(notify_url)) { - //检查任务通知次数是否达到最大尝试次数 - if (task.notify_time > this.notify_max_try) { - common.error('[LIMITED] Task %s notify time has reach the max try number %s', - task.id, this.notify_max_try); - return false; - } - - let params = { - "task_id": task.id, - "task_result": task.results, - "timestamp": common.getTimestamp(), - }; - let token = await this.getUserToken(task.uuid); - params.sign = common.sign(params, token); - const response = await axios.post(notify_url, params, {timeout: this.notify_timeout*1000}); - if (response.status == 200) { - notified = true; - }else { - common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s', - notify_url, response.status, response.statusText, response.data); - } + if (!notify_url || common.isUrlOk(notify_url) == false) { + return false; + } - //更新任务notified状态以及notify_time通知次数 - let taskIndex = this.tasks.findIndex((item) => item.id == task.id); - if (taskIndex) { - this.tasks[taskIndex].notified = notified; - this.tasks[taskIndex].notify_time ++; - } + try { + common.log('[%s] Try to notify task %s via %s', task.notify_time, task.id, notify_url); + + let params = { + "task_id": task.id, + "task_result": task.results, + "timestamp": common.getTimestamp(), + }; + let token = await this.getUserToken(task.uuid); + params.sign = common.sign(params, token); + + const response = await axios.post(notify_url, params, { + timeout: this.notify_timeout*1000, + proxy: this.axios_proxy + }); + if (response.status == 200) { + notified = true; + common.log('Task %s notify to %s done, response data:', task.id, notify_url, response.data); + }else { + common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s', + notify_url, response.status, response.statusText, response.data); } }catch(err) { common.error('[ERROR] Notify to %s failed: %s', notify_url, err); } + //更新任务notified状态以及notify_time通知次数 + let taskIndex = this.tasks.findIndex((item) => item.id == task.id); + if (taskIndex) { + this.tasks[taskIndex].notified = notified; + this.tasks[taskIndex].notify_time ++; + } + return notified; } @@ -464,6 +468,25 @@ class HeroUnion { common.log('Cronjob of auto clean expired tasks started.'); } + //定期尝试给已完成状态的任务notify_url发送通知回调 + autoNotifyTasks() { + let _self = this; + + const frequence = typeof(this.config.autoNotifyTaskFrequence) != 'undefined' + && this.config.autoNotifyTaskFrequence ? this.config.autoNotifyTaskFrequence : 120; //2 分钟检查一次 + const cronjob = cron.schedule(`*/${frequence} * * * * *`, () => { + let task = _self.tasks.find((item) => item.status == 'done' && item.notified == false && item.notify_time < _self.notify_max_try); + if (task) { + _self.handleTaskDone(task); + } + }, { + scheduled: false + }); + + cronjob.start(); + common.log('Cronjob of auto notify done tasks started.'); + } + //获取联盟状态 getStats() { this.stats.taskStatus = this.taskStatus; @@ -523,6 +546,7 @@ class HeroUnion { this.autoReloadConfigs(); this.heroHeartCheck(); this.autoCleanExpiredTasks(); + this.autoNotifyTasks(); } } diff --git a/index.mjs b/index.mjs index 1b425c7..4fc8eaf 100644 --- a/index.mjs +++ b/index.mjs @@ -26,6 +26,12 @@ app.get('/', (req, res) => { return res.send('Welcome to Hero Union of filesite.io'); }); +app.post('/test', (req, res) => { + console.log('Post data got in /test', req.body); + + return res.status(200).send('Done'); +}); + //error handler app.use((err, req, res, next) => { if (res.headersSent) { diff --git a/test/heroUnion.test.mjs b/test/heroUnion.test.mjs index 2b6740b..4dedef1 100644 --- a/test/heroUnion.test.mjs +++ b/test/heroUnion.test.mjs @@ -94,7 +94,7 @@ test('HeroUnion task query test', async (t) => { data_mode: 'json', country: 'cn', lang: 'zh', - notify_url: 'https://tajian.tv/test/' + notify_url: 'http://127.0.0.1:8080/test/' }; let token = 'hello#world!'; params.sign = common.sign(params, token);