Browse Source

add complete task auto notify cronjob, v0.1 done

master v0.1
filesite 8 months ago
parent
commit
76661a0cb8
  1. 11
      README.md
  2. 2
      common.mjs
  3. 3
      conf/config.json
  4. 54
      heroUnion.mjs
  5. 6
      index.mjs
  6. 2
      test/heroUnion.test.mjs

11
README.md

@ -1,11 +1,10 @@
# Hero Union - 英雄联盟 # Hero Union - 英雄联盟
Union of hero bots. Union of hero bots,一个Hero的爬虫联盟。
一个Hero的爬虫联盟。
Hero Union主要做两件事: Hero Union主要做两件事:
* 加入联盟的爬虫定期到联盟领取网页抓取任务,并将任务结果回传 * 加入联盟的爬虫定期到联盟领取网页抓取任务,并将任务结果回传
* 对外提供提交网页抓取任务和获取任务结果的接口供联盟成员使用,并支持任务完成回调通知 * 对外提供提交网页抓取任务和获取任务结果的接口供联盟成员使用,并支持任务完成回调通知
@ -41,7 +40,7 @@ Hero Union主要做两件事:
### 联盟的爬虫工作流程 ### 联盟的爬虫工作流程
1. 本地启动爬虫后,定期向联盟上报爬虫状态 1. 本地启动爬虫后,定期向联盟上报爬虫状态
2. 爬虫定期联盟领取新的网页抓取任务 2. 爬虫定期联盟领取新的网页抓取任务
3. 爬虫完成网页抓取任务时调用接口上报给联盟 3. 爬虫完成网页抓取任务时调用接口上报给联盟
@ -272,9 +271,7 @@ var sign = md5( JSON.stringify(sortedParams) + token );
## Hero Union英雄联盟开发进度 ## Hero Union英雄联盟开发进度
更新日期:2024-4-10 * v0.1 - beta 2024-04-11 已完成
* v0.1 - beta 开发中,完成进度 80% 左右
## 其它参考 ## 其它参考

2
common.mjs

@ -154,7 +154,7 @@ class Common {
//检查url是否符合要求 //检查url是否符合要求
isUrlOk(url) { isUrlOk(url) {
return /^http(s)?:\/\/[\w\.\/]{6,100}$/i.test(url); return /^http(s)?:\/\/[\w\.\/:]{6,100}$/i.test(url);
} }
//检查uuid是否符合要求:6-32位的英文字符串 //检查uuid是否符合要求:6-32位的英文字符串

3
conf/config.json

@ -12,9 +12,12 @@
"reloadConfigFrequence": 60, "reloadConfigFrequence": 60,
"heroHeartCheckFrequence": 60, "heroHeartCheckFrequence": 60,
"autoCleanTaskFrequence": 300, "autoCleanTaskFrequence": 300,
"autoNotifyTaskFrequence": 20,
"max_list_hero_num": 1000, "max_list_hero_num": 1000,
"axios_proxy": false,
"tokens": { "tokens": {
"herounion_demo": "hello#world!" "herounion_demo": "hello#world!"
} }

54
heroUnion.mjs

@ -38,6 +38,7 @@ class HeroUnion {
this.notify_max_try = 5; //回调通知最多尝试次数 this.notify_max_try = 5; //回调通知最多尝试次数
this.heroHeartTimeout = 600; //爬虫心跳超时时长,单位:秒 this.heroHeartTimeout = 600; //爬虫心跳超时时长,单位:秒
this.max_list_hero_num = 1000; //在接口getHeros()里最多返回的爬虫数量 this.max_list_hero_num = 1000; //在接口getHeros()里最多返回的爬虫数量
this.axios_proxy = false; //axios库发送请求时是否使用系统代理
this.stats = { this.stats = {
start_time: common.getTimestampInSeconds() start_time: common.getTimestampInSeconds()
@ -110,6 +111,10 @@ class HeroUnion {
if (typeof(this.config.max_list_hero_num) != 'undefined' && this.config.max_list_hero_num) { if (typeof(this.config.max_list_hero_num) != 'undefined' && this.config.max_list_hero_num) {
this.max_list_hero_num = this.config.max_list_hero_num; //最大返回爬虫数量 this.max_list_hero_num = this.config.max_list_hero_num; //最大返回爬虫数量
} }
if (typeof(this.config.axios_proxy) != 'undefined' && this.config.axios_proxy) {
this.axios_proxy = this.config.axios_proxy;
}
} }
return this.config; return this.config;
@ -298,21 +303,16 @@ class HeroUnion {
} }
//任务完成触发回调通知 //任务完成触发回调通知
async handleTaskDone(id) { async handleTaskDone(task) {
let notified = false; let notified = false;
let task = this.getTaskById(id);
let notify_url = task.notify_url; let notify_url = task.notify_url;
if (!notify_url || common.isUrlOk(notify_url) == false) {
try {
if (notify_url && /^http(s)?:\/\/[\w\.]+/i.test(notify_url)) {
//检查任务通知次数是否达到最大尝试次数
if (task.notify_time > this.notify_max_try) {
common.error('[LIMITED] Task %s notify time has reach the max try number %s',
task.id, this.notify_max_try);
return false; return false;
} }
try {
common.log('[%s] Try to notify task %s via %s', task.notify_time, task.id, notify_url);
let params = { let params = {
"task_id": task.id, "task_id": task.id,
"task_result": task.results, "task_result": task.results,
@ -320,13 +320,21 @@ class HeroUnion {
}; };
let token = await this.getUserToken(task.uuid); let token = await this.getUserToken(task.uuid);
params.sign = common.sign(params, token); params.sign = common.sign(params, token);
const response = await axios.post(notify_url, params, {timeout: this.notify_timeout*1000});
const response = await axios.post(notify_url, params, {
timeout: this.notify_timeout*1000,
proxy: this.axios_proxy
});
if (response.status == 200) { if (response.status == 200) {
notified = true; notified = true;
common.log('Task %s notify to %s done, response data:', task.id, notify_url, response.data);
}else { }else {
common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s', common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s',
notify_url, response.status, response.statusText, response.data); notify_url, response.status, response.statusText, response.data);
} }
}catch(err) {
common.error('[ERROR] Notify to %s failed: %s', notify_url, err);
}
//更新任务notified状态以及notify_time通知次数 //更新任务notified状态以及notify_time通知次数
let taskIndex = this.tasks.findIndex((item) => item.id == task.id); let taskIndex = this.tasks.findIndex((item) => item.id == task.id);
@ -334,10 +342,6 @@ class HeroUnion {
this.tasks[taskIndex].notified = notified; this.tasks[taskIndex].notified = notified;
this.tasks[taskIndex].notify_time ++; this.tasks[taskIndex].notify_time ++;
} }
}
}catch(err) {
common.error('[ERROR] Notify to %s failed: %s', notify_url, err);
}
return notified; return notified;
} }
@ -464,6 +468,25 @@ class HeroUnion {
common.log('Cronjob of auto clean expired tasks started.'); common.log('Cronjob of auto clean expired tasks started.');
} }
//定期尝试给已完成状态的任务notify_url发送通知回调
autoNotifyTasks() {
let _self = this;
const frequence = typeof(this.config.autoNotifyTaskFrequence) != 'undefined'
&& this.config.autoNotifyTaskFrequence ? this.config.autoNotifyTaskFrequence : 120; //2 分钟检查一次
const cronjob = cron.schedule(`*/${frequence} * * * * *`, () => {
let task = _self.tasks.find((item) => item.status == 'done' && item.notified == false && item.notify_time < _self.notify_max_try);
if (task) {
_self.handleTaskDone(task);
}
}, {
scheduled: false
});
cronjob.start();
common.log('Cronjob of auto notify done tasks started.');
}
//获取联盟状态 //获取联盟状态
getStats() { getStats() {
this.stats.taskStatus = this.taskStatus; this.stats.taskStatus = this.taskStatus;
@ -523,6 +546,7 @@ class HeroUnion {
this.autoReloadConfigs(); this.autoReloadConfigs();
this.heroHeartCheck(); this.heroHeartCheck();
this.autoCleanExpiredTasks(); this.autoCleanExpiredTasks();
this.autoNotifyTasks();
} }
} }

6
index.mjs

@ -26,6 +26,12 @@ app.get('/', (req, res) => {
return res.send('Welcome to Hero Union of filesite.io'); return res.send('Welcome to Hero Union of filesite.io');
}); });
app.post('/test', (req, res) => {
console.log('Post data got in /test', req.body);
return res.status(200).send('Done');
});
//error handler //error handler
app.use((err, req, res, next) => { app.use((err, req, res, next) => {
if (res.headersSent) { if (res.headersSent) {

2
test/heroUnion.test.mjs

@ -94,7 +94,7 @@ test('HeroUnion task query test', async (t) => {
data_mode: 'json', data_mode: 'json',
country: 'cn', country: 'cn',
lang: 'zh', lang: 'zh',
notify_url: 'https://tajian.tv/test/' notify_url: 'http://127.0.0.1:8080/test/'
}; };
let token = 'hello#world!'; let token = 'hello#world!';
params.sign = common.sign(params, token); params.sign = common.sign(params, token);

Loading…
Cancel
Save