Browse Source

spider add task run limit

master
filesite 1 year ago
parent
commit
a81f26f815
  1. 9
      spider.mjs

9
spider.mjs

@ -25,8 +25,11 @@ import cron from 'node-cron'; @@ -25,8 +25,11 @@ import cron from 'node-cron';
const heroCloudServer = 'ws://192.168.3.13:1818';
//spider run
let spider_is_running = false;
const task_check_time = 20; //每 20 秒抓取一次
const task_auto_run = cron.schedule(`*/${task_check_time} * * * * *`, async () => {
if (spider_is_running == true) {return false;} //避免同时执行多个爬虫任务
const task = taskMoniter.getNewTask();
if (!task) {return false;}
@ -49,6 +52,8 @@ import cron from 'node-cron'; @@ -49,6 +52,8 @@ import cron from 'node-cron';
}
if (bot) {
spider_is_running = true;
taskMoniter.setTaskRunning(task.id);
const data = await bot.scrap(task.url);
console.log('Data got by bot', data);
@ -56,7 +61,11 @@ import cron from 'node-cron'; @@ -56,7 +61,11 @@ import cron from 'node-cron';
if (typeof(data.done) != 'undefined' && data.done == true) {
taskMoniter.setTaskDone(task.id);
await tajian.saveUrlShortcut(data);
}else {
taskMoniter.setTaskFailed(task.id);
}
spider_is_running = false;
}else {
console.error('No bot matched with url %s', task.url);
}

Loading…
Cancel
Save