|
|
|
@ -63,9 +63,19 @@ import path from 'node:path';
@@ -63,9 +63,19 @@ import path from 'node:path';
|
|
|
|
|
return false; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//随机延迟一段时间,将不同爬虫的执行时间错开
|
|
|
|
|
let rnd_secods = parseInt(Math.random() * task_check_time); |
|
|
|
|
console.log("Sleep %s seconds...", rnd_secods); |
|
|
|
|
await common.delay(rnd_secods); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const task = taskMoniter.getNewTask(); |
|
|
|
|
if (!task) {return false;} |
|
|
|
|
|
|
|
|
|
//标记爬虫开始执行任务
|
|
|
|
|
spider_is_running = true; |
|
|
|
|
last_run_time = common.getTimestampInSeconds(); |
|
|
|
|
|
|
|
|
|
let logFile = path.resolve(configs.task_log_dir) + `/tasks_${heroUnionConfig.name}.log`; |
|
|
|
|
await common.saveLog(logFile, JSON.stringify(task) + "\n"); |
|
|
|
|
|
|
|
|
@ -95,9 +105,6 @@ import path from 'node:path';
@@ -95,9 +105,6 @@ import path from 'node:path';
|
|
|
|
|
if (bot) { |
|
|
|
|
console.log('Spider craping...'); |
|
|
|
|
|
|
|
|
|
spider_is_running = true; |
|
|
|
|
last_run_time = common.getTimestampInSeconds(); |
|
|
|
|
|
|
|
|
|
let taskStarted = taskMoniter.setTaskRunning(task.id); |
|
|
|
|
const data = await bot.scrap(task.url); |
|
|
|
|
//console.log('Data got by bot', data);
|
|
|
|
@ -141,6 +148,7 @@ import path from 'node:path';
@@ -141,6 +148,7 @@ import path from 'node:path';
|
|
|
|
|
spider_is_running = false; |
|
|
|
|
}else { |
|
|
|
|
console.error('No bot matched with url %s', task.url); |
|
|
|
|
spider_is_running = false; |
|
|
|
|
taskMoniter.setTaskFailed(task.id); |
|
|
|
|
} |
|
|
|
|
}, { |
|
|
|
@ -154,6 +162,11 @@ import path from 'node:path';
@@ -154,6 +162,11 @@ import path from 'node:path';
|
|
|
|
|
//爬虫心跳上报
|
|
|
|
|
const heartBeatFrequence = 5; //5 分钟上报一次
|
|
|
|
|
const heroUnionHeartBeat = cron.schedule(`*/${heartBeatFrequence} * * * *`, async () => { |
|
|
|
|
//随机延迟一段时间,将不同爬虫的执行时间错开
|
|
|
|
|
let rnd_secods = parseInt(Math.random() * 60); |
|
|
|
|
console.log("Sleep %s seconds...", rnd_secods); |
|
|
|
|
await common.delay(rnd_secods); |
|
|
|
|
|
|
|
|
|
let status = spider_is_running ? 'busy' : 'idle'; |
|
|
|
|
const res = await heroBot.heartBeat(status); |
|
|
|
|
console.log('HeroUnion bot heart beat result', res); |
|
|
|
|