From 265c4e88c926102903418e90ad6f9d4c5670de6c Mon Sep 17 00:00:00 2001 From: filesite Date: Tue, 16 Apr 2024 18:05:50 +0800 Subject: [PATCH] add notify handler for HeroUnion task data save --- config.mjs | 4 ++- lib/common.mjs | 4 +++ lib/taskMoniter.mjs | 72 +++++++++++++++++++++++++++++++++++++++++++++ spider.mjs | 12 ++++---- 4 files changed, 86 insertions(+), 6 deletions(-) diff --git a/config.mjs b/config.mjs index 3674833..ec09a02 100644 --- a/config.mjs +++ b/config.mjs @@ -14,7 +14,9 @@ export default { country: 'cn', //爬虫所在国家 lang: 'zh', //爬虫支持的语言 contact: 'https://filesite.io', //爬虫的联系方式 - data_mode: 'json' //爬虫支持的数据格式 + data_mode: 'json', //爬虫支持的数据格式 + + notify_max_try: 5 //爬虫完成任务回传数据最多尝试次数 }, diff --git a/lib/common.mjs b/lib/common.mjs index c7ff63b..4c4866e 100644 --- a/lib/common.mjs +++ b/lib/common.mjs @@ -9,6 +9,10 @@ export default { return today.toLocaleString(locales); }, + getTimestampInSeconds: function() { + return Math.floor(Date.now() / 1000); + }, + removeFile: async function(filepath) { let done = false; diff --git a/lib/taskMoniter.mjs b/lib/taskMoniter.mjs index 7245191..c0b8566 100644 --- a/lib/taskMoniter.mjs +++ b/lib/taskMoniter.mjs @@ -20,7 +20,9 @@ import HeroBot from "./heroBot.mjs"; class TaskMoniter { constructor(task_list_dir) { this.check_time_gap = 1; //检测间隔时间,单位:分钟 + this.notify_time_gap = 5; //数据回调间隔时间,单位:分钟 this.checking = false; + this.notifying = false; this.task_dir = task_list_dir; //监控目录:任务列表保存目录 this.tasks = {}; //内存中的任务列表 @@ -150,6 +152,17 @@ class TaskMoniter { return true; } + updateTask(task_id, task) { + if (typeof(this.tasks[task.id]) == 'undefined') { + return false; + } + + this.tasks[task.id] = task; + + return true; + } + + //检查新的数据抓取任务 async checkTasks() { if (this.checking == true) { return; @@ -191,6 +204,49 @@ class TaskMoniter { } } + //保存数据到HeroUnion联盟 + //检查已经抓取到数据的任务 + async notifyHandle(task) { + if (typeof(task.from) == 'undefined' || task.from != 'HeroUnion' || this.notifying) { + return false; + } + + //已经完成回传 + if (typeof(task.notified) != 'undefined' && task.notified) { + return false; + } + + //判断当前任务数据回传次数是否小于最多尝试次数 + if (typeof(task.notify_time) != 'undefined' && task.notify_time >= configs.herounion.notify_max_try) { + return false; + } + + //尝试回传数据 + this.notifying = true; + let saveRes = await this.heroBot.saveTaskData(task.id, task.token, task.data); + this.notifying = false; + + if (typeof(task.notify_time) != 'undefined') { + task.notify_time ++; + }else { + task.notify_time = 1; //回传次数 + } + + task.notify_at = common.getTimestampInSeconds(); //回传时间戳 + + //如果返回数据code=1,则认为数据保存成功,否则过几分钟再次尝试 + if (saveRes && saveRes.code == 1) { + task.notified = true; //记录已经完成回传 + console.log("[%s][%s] Task %s's data save to HeroUnion done", + common.getTimeString(), task.notify_time, task.id); + }else { + console.log("[%s][%s] Task %s's data save to HeroUnion failed, it will try again later.", + common.getTimeString(), task.notify_time, task.id); + } + + this.updateTask(task.id, task); //更新任务数据 + } + run() { //开始监控任务目录,把所有任务缓存到内存 console.log('[%s] TaskMoniter started.', common.getTimeString()); @@ -206,6 +262,22 @@ class TaskMoniter { task_auto_run.start(); console.log('[%s] TaskMoniter auto check started.', common.getTimeString()); + + + //定期向HeroUnion回传任务抓取结果 + const task_notify_time = this.notify_time_gap; + const notify_auto_run = cron.schedule(`*/${task_notify_time} * * * *`, async () => { + let task = _self.tasks.find((item) => typeof(item.from) != 'undefined' && item.from == 'HeroUnion' && typeof(item.notified) == 'undefined'); + if (task) { + console.log("[%s] Try to save task %s's data to HeroUnion", common.getTimeString(), task.id); + await _self.notifyHandle(task); + } + }, { + scheduled: false + }); + + notify_auto_run.start(); + console.log('[%s] TaskMoniter auto notify started.', common.getTimeString()); } } diff --git a/spider.mjs b/spider.mjs index e86b16e..e728214 100644 --- a/spider.mjs +++ b/spider.mjs @@ -75,16 +75,18 @@ import cron from 'node-cron'; //console.log('Data got by bot', data); if (typeof(data.done) != 'undefined' && data.done == true) { + task.data = data; //把抓取到的数据保存到任务里 + taskMoniter.updateTask(task.id, task); + if ( await tajian.saveUrlShortcut(task.id, data) && await tajian.saveDescriptionFiles(task.id, data) ) { - taskMoniter.setTaskDone(task.id); + //马上回传一次数据 + taskMoniter.notifyHandle(task); - //保存数据到HeroUnion联盟 - if (typeof(task.from) != 'undefined' && task.from == 'HeroUnion') { - heroBot.saveTaskData(task.id, task.token, data); - } + //标记任务完成 + taskMoniter.setTaskDone(task.id); }else { taskMoniter.setTaskFailed(task.id); }