Browse Source

add complete task auto notify cronjob, v0.1 done

master v0.1
filesite 9 months ago
parent
commit
76661a0cb8
  1. 11
      README.md
  2. 2
      common.mjs
  3. 3
      conf/config.json
  4. 90
      heroUnion.mjs
  5. 6
      index.mjs
  6. 2
      test/heroUnion.test.mjs

11
README.md

@ -1,11 +1,10 @@ @@ -1,11 +1,10 @@
# Hero Union - 英雄联盟
Union of hero bots.
一个Hero的爬虫联盟。
Union of hero bots,一个Hero的爬虫联盟。
Hero Union主要做两件事:
* 加入联盟的爬虫定期到联盟领取网页抓取任务,并将任务结果回传
* 加入联盟的爬虫定期到联盟领取网页抓取任务,并将任务结果回传
* 对外提供提交网页抓取任务和获取任务结果的接口供联盟成员使用,并支持任务完成回调通知
@ -41,7 +40,7 @@ Hero Union主要做两件事: @@ -41,7 +40,7 @@ Hero Union主要做两件事:
### 联盟的爬虫工作流程
1. 本地启动爬虫后,定期向联盟上报爬虫状态
2. 爬虫定期联盟领取新的网页抓取任务
2. 爬虫定期联盟领取新的网页抓取任务
3. 爬虫完成网页抓取任务时调用接口上报给联盟
@ -272,9 +271,7 @@ var sign = md5( JSON.stringify(sortedParams) + token ); @@ -272,9 +271,7 @@ var sign = md5( JSON.stringify(sortedParams) + token );
## Hero Union英雄联盟开发进度
更新日期:2024-4-10
* v0.1 - beta 开发中,完成进度 80% 左右
* v0.1 - beta 2024-04-11 已完成
## 其它参考

2
common.mjs

@ -154,7 +154,7 @@ class Common { @@ -154,7 +154,7 @@ class Common {
//检查url是否符合要求
isUrlOk(url) {
return /^http(s)?:\/\/[\w\.\/]{6,100}$/i.test(url);
return /^http(s)?:\/\/[\w\.\/:]{6,100}$/i.test(url);
}
//检查uuid是否符合要求:6-32位的英文字符串

3
conf/config.json

@ -12,9 +12,12 @@ @@ -12,9 +12,12 @@
"reloadConfigFrequence": 60,
"heroHeartCheckFrequence": 60,
"autoCleanTaskFrequence": 300,
"autoNotifyTaskFrequence": 20,
"max_list_hero_num": 1000,
"axios_proxy": false,
"tokens": {
"herounion_demo": "hello#world!"
}

90
heroUnion.mjs

@ -38,6 +38,7 @@ class HeroUnion { @@ -38,6 +38,7 @@ class HeroUnion {
this.notify_max_try = 5; //回调通知最多尝试次数
this.heroHeartTimeout = 600; //爬虫心跳超时时长,单位:秒
this.max_list_hero_num = 1000; //在接口getHeros()里最多返回的爬虫数量
this.axios_proxy = false; //axios库发送请求时是否使用系统代理
this.stats = {
start_time: common.getTimestampInSeconds()
@ -110,6 +111,10 @@ class HeroUnion { @@ -110,6 +111,10 @@ class HeroUnion {
if (typeof(this.config.max_list_hero_num) != 'undefined' && this.config.max_list_hero_num) {
this.max_list_hero_num = this.config.max_list_hero_num; //最大返回爬虫数量
}
if (typeof(this.config.axios_proxy) != 'undefined' && this.config.axios_proxy) {
this.axios_proxy = this.config.axios_proxy;
}
}
return this.config;
@ -298,47 +303,46 @@ class HeroUnion { @@ -298,47 +303,46 @@ class HeroUnion {
}
//任务完成触发回调通知
async handleTaskDone(id) {
async handleTaskDone(task) {
let notified = false;
let task = this.getTaskById(id);
let notify_url = task.notify_url;
try {
if (notify_url && /^http(s)?:\/\/[\w\.]+/i.test(notify_url)) {
//检查任务通知次数是否达到最大尝试次数
if (task.notify_time > this.notify_max_try) {
common.error('[LIMITED] Task %s notify time has reach the max try number %s',
task.id, this.notify_max_try);
return false;
}
let params = {
"task_id": task.id,
"task_result": task.results,
"timestamp": common.getTimestamp(),
};
let token = await this.getUserToken(task.uuid);
params.sign = common.sign(params, token);
const response = await axios.post(notify_url, params, {timeout: this.notify_timeout*1000});
if (response.status == 200) {
notified = true;
}else {
common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s',
notify_url, response.status, response.statusText, response.data);
}
if (!notify_url || common.isUrlOk(notify_url) == false) {
return false;
}
//更新任务notified状态以及notify_time通知次数
let taskIndex = this.tasks.findIndex((item) => item.id == task.id);
if (taskIndex) {
this.tasks[taskIndex].notified = notified;
this.tasks[taskIndex].notify_time ++;
}
try {
common.log('[%s] Try to notify task %s via %s', task.notify_time, task.id, notify_url);
let params = {
"task_id": task.id,
"task_result": task.results,
"timestamp": common.getTimestamp(),
};
let token = await this.getUserToken(task.uuid);
params.sign = common.sign(params, token);
const response = await axios.post(notify_url, params, {
timeout: this.notify_timeout*1000,
proxy: this.axios_proxy
});
if (response.status == 200) {
notified = true;
common.log('Task %s notify to %s done, response data:', task.id, notify_url, response.data);
}else {
common.error('[FAILED] Notify to %s failed, response status: %s, status text: %s, result: %s',
notify_url, response.status, response.statusText, response.data);
}
}catch(err) {
common.error('[ERROR] Notify to %s failed: %s', notify_url, err);
}
//更新任务notified状态以及notify_time通知次数
let taskIndex = this.tasks.findIndex((item) => item.id == task.id);
if (taskIndex) {
this.tasks[taskIndex].notified = notified;
this.tasks[taskIndex].notify_time ++;
}
return notified;
}
@ -464,6 +468,25 @@ class HeroUnion { @@ -464,6 +468,25 @@ class HeroUnion {
common.log('Cronjob of auto clean expired tasks started.');
}
//定期尝试给已完成状态的任务notify_url发送通知回调
autoNotifyTasks() {
let _self = this;
const frequence = typeof(this.config.autoNotifyTaskFrequence) != 'undefined'
&& this.config.autoNotifyTaskFrequence ? this.config.autoNotifyTaskFrequence : 120; //2 分钟检查一次
const cronjob = cron.schedule(`*/${frequence} * * * * *`, () => {
let task = _self.tasks.find((item) => item.status == 'done' && item.notified == false && item.notify_time < _self.notify_max_try);
if (task) {
_self.handleTaskDone(task);
}
}, {
scheduled: false
});
cronjob.start();
common.log('Cronjob of auto notify done tasks started.');
}
//获取联盟状态
getStats() {
this.stats.taskStatus = this.taskStatus;
@ -523,6 +546,7 @@ class HeroUnion { @@ -523,6 +546,7 @@ class HeroUnion {
this.autoReloadConfigs();
this.heroHeartCheck();
this.autoCleanExpiredTasks();
this.autoNotifyTasks();
}
}

6
index.mjs

@ -26,6 +26,12 @@ app.get('/', (req, res) => { @@ -26,6 +26,12 @@ app.get('/', (req, res) => {
return res.send('Welcome to Hero Union of filesite.io');
});
app.post('/test', (req, res) => {
console.log('Post data got in /test', req.body);
return res.status(200).send('Done');
});
//error handler
app.use((err, req, res, next) => {
if (res.headersSent) {

2
test/heroUnion.test.mjs

@ -94,7 +94,7 @@ test('HeroUnion task query test', async (t) => { @@ -94,7 +94,7 @@ test('HeroUnion task query test', async (t) => {
data_mode: 'json',
country: 'cn',
lang: 'zh',
notify_url: 'https://tajian.tv/test/'
notify_url: 'http://127.0.0.1:8080/test/'
};
let token = 'hello#world!';
params.sign = common.sign(params, token);

Loading…
Cancel
Save