From 71ac38ec7aaa6c6fba727a4a06ea97ce3d5b241e Mon Sep 17 00:00:00 2001 From: filesite Date: Wed, 17 Apr 2024 21:17:40 +0800 Subject: [PATCH] add task fail retry and finally failed report to HeroUnion --- config.mjs | 1 + lib/heroBot.mjs | 5 ++- lib/tajian.mjs | 94 ++++++++++++++++++++++----------------------- lib/taskMoniter.mjs | 8 +++- spider.mjs | 18 ++++++++- 5 files changed, 73 insertions(+), 53 deletions(-) diff --git a/config.mjs b/config.mjs index 9c364c1..3bb39b9 100644 --- a/config.mjs +++ b/config.mjs @@ -5,6 +5,7 @@ let configs = { task_list_dir: 'todo/', //待抓取任务文件保存目录 data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io + max_fail_retry: 5, //任务失败最多重试次数 //HeroUnion英雄联盟对接配置 herounion: { diff --git a/lib/heroBot.mjs b/lib/heroBot.mjs index bc5b655..e793ab0 100644 --- a/lib/heroBot.mjs +++ b/lib/heroBot.mjs @@ -122,12 +122,15 @@ class HeroBot { } //回传任务数据给联盟 - async saveTaskData(task_id, task_token, task_data) { + async saveTaskData(task_id, task_token, task_data, task_status) { let params = { name: this.name, task_id: task_id, task_result: task_data }; + if (typeof(task_status) != 'undefined') { + params.status = task_status; + } params.sign = this.sign(params, task_token); //对参数进行签名 let response = null; diff --git a/lib/tajian.mjs b/lib/tajian.mjs index 385a702..153c517 100644 --- a/lib/tajian.mjs +++ b/lib/tajian.mjs @@ -9,66 +9,62 @@ import path from 'node:path'; class TaJian { - constructor(data_save_dir) { - this.save_dir = data_save_dir; - } + constructor(data_save_dir) { + this.save_dir = data_save_dir; + } - /* - * Example: - [InternetShortcut] - URL=https://microsoft.com/ - */ - async saveUrlShortcut(filename, data) { - //console.log('TaJian try to save shortcut url'); + /* + * Example: + [InternetShortcut] + URL=https://microsoft.com/ + */ + async saveUrlShortcut(filename, data) { + try { - try { + const dirPath = path.resolve(this.save_dir); + const filepath = `${dirPath}/${filename}.url`; - const dirPath = path.resolve(this.save_dir); - const filepath = `${dirPath}/${filename}.url`; + const shortUrlContent = `\[InternetShortcut\] + URL=${data.url} + `; + await writeFile(filepath, shortUrlContent, { encoding: 'utf8' }); + }catch(error) { + console.error('Save short url file failed: %s', error); + return false; + } - const shortUrlContent = `\[InternetShortcut\] -URL=${data.url} -`; - await writeFile(filepath, shortUrlContent, { encoding: 'utf8' }); - }catch(error) { - console.error('Save short url file failed: %s', error); - return false; + return true; } - return true; - } - - async saveDescriptionFiles(filename, data) { - //console.log('TaJian try to save description files'); - - try { - const dirPath = path.resolve(this.save_dir); - - //save title - let filepath = `${dirPath}/${filename}_title.txt`; - let content = data.title; - await writeFile(filepath, content, { encoding: 'utf8' }); + async saveDescriptionFiles(filename, data) { + try { + const dirPath = path.resolve(this.save_dir); - if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) { - filepath = `${dirPath}/${filename}.${data.cover_type}`; - content = Buffer.from(data.cover_base64, "base64"); //保存图片文件 + //save title + let filepath = `${dirPath}/${filename}_title.txt`; + let content = data.title; await writeFile(filepath, content, { encoding: 'utf8' }); - filepath = `${dirPath}/${filename}_cover.txt`; - content = `${filename}.${data.cover_type}`; //保存图片路径 - await writeFile(filepath, content, { encoding: 'utf8' }); - }else { - filepath = `${dirPath}/${filename}_cover.txt`; - content = data.cover; //保存图片网址 - await writeFile(filepath, content, { encoding: 'utf8' }); + if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) { + filepath = `${dirPath}/${filename}.${data.cover_type}`; + content = Buffer.from(data.cover_base64, "base64"); //保存图片文件 + await writeFile(filepath, content, { encoding: 'utf8' }); + + filepath = `${dirPath}/${filename}_cover.txt`; + content = `${filename}.${data.cover_type}`; //保存图片路径 + await writeFile(filepath, content, { encoding: 'utf8' }); + }else { + filepath = `${dirPath}/${filename}_cover.txt`; + content = data.cover; //保存图片网址 + await writeFile(filepath, content, { encoding: 'utf8' }); + } + }catch(error) { + console.error('Save description files failed: %s', error); + return false; } - }catch(error) { - console.error('Save description files failed: %s', error); - return false; - } - return true; - } + return true; + } } diff --git a/lib/taskMoniter.mjs b/lib/taskMoniter.mjs index 984f2f5..65396dc 100644 --- a/lib/taskMoniter.mjs +++ b/lib/taskMoniter.mjs @@ -104,8 +104,12 @@ class TaskMoniter { this.taskStatus[this.statusCode.done] ++; this.taskStatus[this.statusCode.running] --; - const filepath = this.getTaskFilePath(task_id); - common.removeFile(filepath); //async delete + //如果不是联盟的任务,则把本地任务文件删除 + if (typeof(this.tasks[task_id].from) == 'undefined' || this.tasks[task_id].from != 'HeroUnion') { + const filepath = this.getTaskFilePath(task_id); + common.removeFile(filepath); //async delete + } + return true; } diff --git a/spider.mjs b/spider.mjs index 90b9a85..9a9d1ff 100644 --- a/spider.mjs +++ b/spider.mjs @@ -1,6 +1,8 @@ /** * 爬虫主程序 * 负责监听任务目录里的新任务,并自动抓取数据保存到数据目录。 + * 增加失败任务的重试机制 + * 增加失败任务上报 */ import getConfigs from './config.mjs'; import common from './lib/common.mjs'; @@ -93,7 +95,21 @@ import cron from 'node-cron'; taskMoniter.setTaskFailed(task.id); } }else { - taskMoniter.setTaskFailed(task.id); + //失败后最多重试 5 次 + if (typeof(task.fail_retry) == 'undefined') { + task.fail_retry = 0; + }else { + task.fail_retry ++; + } + + taskMoniter.updateTask(task.id, task); + + if (task.fail_retry > configs.max_fail_retry) { + taskMoniter.setTaskFailed(task.id); + + //上报联盟,任务失败 + heroBot.saveTaskData(task.id, task.token, [], 'failed'); + } } spider_is_running = false;