Browse Source

add task fail retry and finally failed report to HeroUnion

master
filesite 9 months ago
parent
commit
71ac38ec7a
  1. 1
      config.mjs
  2. 5
      lib/heroBot.mjs
  3. 94
      lib/tajian.mjs
  4. 8
      lib/taskMoniter.mjs
  5. 18
      spider.mjs

1
config.mjs

@ -5,6 +5,7 @@ let configs = { @@ -5,6 +5,7 @@ let configs = {
task_list_dir: 'todo/', //待抓取任务文件保存目录
data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io
max_fail_retry: 5, //任务失败最多重试次数
//HeroUnion英雄联盟对接配置
herounion: {

5
lib/heroBot.mjs

@ -122,12 +122,15 @@ class HeroBot { @@ -122,12 +122,15 @@ class HeroBot {
}
//回传任务数据给联盟
async saveTaskData(task_id, task_token, task_data) {
async saveTaskData(task_id, task_token, task_data, task_status) {
let params = {
name: this.name,
task_id: task_id,
task_result: task_data
};
if (typeof(task_status) != 'undefined') {
params.status = task_status;
}
params.sign = this.sign(params, task_token); //对参数进行签名
let response = null;

94
lib/tajian.mjs

@ -9,66 +9,62 @@ import path from 'node:path'; @@ -9,66 +9,62 @@ import path from 'node:path';
class TaJian {
constructor(data_save_dir) {
this.save_dir = data_save_dir;
}
constructor(data_save_dir) {
this.save_dir = data_save_dir;
}
/*
* Example:
[InternetShortcut]
URL=https://microsoft.com/
*/
async saveUrlShortcut(filename, data) {
//console.log('TaJian try to save shortcut url');
/*
* Example:
[InternetShortcut]
URL=https://microsoft.com/
*/
async saveUrlShortcut(filename, data) {
try {
try {
const dirPath = path.resolve(this.save_dir);
const filepath = `${dirPath}/${filename}.url`;
const dirPath = path.resolve(this.save_dir);
const filepath = `${dirPath}/${filename}.url`;
const shortUrlContent = `\[InternetShortcut\]
URL=${data.url}
`;
await writeFile(filepath, shortUrlContent, { encoding: 'utf8' });
}catch(error) {
console.error('Save short url file failed: %s', error);
return false;
}
const shortUrlContent = `\[InternetShortcut\]
URL=${data.url}
`;
await writeFile(filepath, shortUrlContent, { encoding: 'utf8' });
}catch(error) {
console.error('Save short url file failed: %s', error);
return false;
return true;
}
return true;
}
async saveDescriptionFiles(filename, data) {
//console.log('TaJian try to save description files');
try {
const dirPath = path.resolve(this.save_dir);
//save title
let filepath = `${dirPath}/${filename}_title.txt`;
let content = data.title;
await writeFile(filepath, content, { encoding: 'utf8' });
async saveDescriptionFiles(filename, data) {
try {
const dirPath = path.resolve(this.save_dir);
if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) {
filepath = `${dirPath}/${filename}.${data.cover_type}`;
content = Buffer.from(data.cover_base64, "base64"); //保存图片文件
//save title
let filepath = `${dirPath}/${filename}_title.txt`;
let content = data.title;
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`;
content = `${filename}.${data.cover_type}`; //保存图片路径
await writeFile(filepath, content, { encoding: 'utf8' });
}else {
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover; //保存图片网址
await writeFile(filepath, content, { encoding: 'utf8' });
if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) {
filepath = `${dirPath}/${filename}.${data.cover_type}`;
content = Buffer.from(data.cover_base64, "base64"); //保存图片文件
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`;
content = `${filename}.${data.cover_type}`; //保存图片路径
await writeFile(filepath, content, { encoding: 'utf8' });
}else {
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover; //保存图片网址
await writeFile(filepath, content, { encoding: 'utf8' });
}
}catch(error) {
console.error('Save description files failed: %s', error);
return false;
}
}catch(error) {
console.error('Save description files failed: %s', error);
return false;
}
return true;
}
return true;
}
}

8
lib/taskMoniter.mjs

@ -104,8 +104,12 @@ class TaskMoniter { @@ -104,8 +104,12 @@ class TaskMoniter {
this.taskStatus[this.statusCode.done] ++;
this.taskStatus[this.statusCode.running] --;
const filepath = this.getTaskFilePath(task_id);
common.removeFile(filepath); //async delete
//如果不是联盟的任务,则把本地任务文件删除
if (typeof(this.tasks[task_id].from) == 'undefined' || this.tasks[task_id].from != 'HeroUnion') {
const filepath = this.getTaskFilePath(task_id);
common.removeFile(filepath); //async delete
}
return true;
}

18
spider.mjs

@ -1,6 +1,8 @@ @@ -1,6 +1,8 @@
/**
* 爬虫主程序
* 负责监听任务目录里的新任务并自动抓取数据保存到数据目录
* 增加失败任务的重试机制
* 增加失败任务上报
*/
import getConfigs from './config.mjs';
import common from './lib/common.mjs';
@ -93,7 +95,21 @@ import cron from 'node-cron'; @@ -93,7 +95,21 @@ import cron from 'node-cron';
taskMoniter.setTaskFailed(task.id);
}
}else {
taskMoniter.setTaskFailed(task.id);
//失败后最多重试 5 次
if (typeof(task.fail_retry) == 'undefined') {
task.fail_retry = 0;
}else {
task.fail_retry ++;
}
taskMoniter.updateTask(task.id, task);
if (task.fail_retry > configs.max_fail_retry) {
taskMoniter.setTaskFailed(task.id);
//上报联盟,任务失败
heroBot.saveTaskData(task.id, task.token, [], 'failed');
}
}
spider_is_running = false;

Loading…
Cancel
Save