From a81f26f8157a3269349338cf3076343bcbdcf636 Mon Sep 17 00:00:00 2001 From: filesite Date: Fri, 15 Sep 2023 17:02:00 +0800 Subject: [PATCH] spider add task run limit --- spider.mjs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spider.mjs b/spider.mjs index 23f2049..7dd3648 100644 --- a/spider.mjs +++ b/spider.mjs @@ -25,8 +25,11 @@ import cron from 'node-cron'; const heroCloudServer = 'ws://192.168.3.13:1818'; //spider run + let spider_is_running = false; const task_check_time = 20; //每 20 秒抓取一次 const task_auto_run = cron.schedule(`*/${task_check_time} * * * * *`, async () => { + if (spider_is_running == true) {return false;} //避免同时执行多个爬虫任务 + const task = taskMoniter.getNewTask(); if (!task) {return false;} @@ -49,6 +52,8 @@ import cron from 'node-cron'; } if (bot) { + spider_is_running = true; + taskMoniter.setTaskRunning(task.id); const data = await bot.scrap(task.url); console.log('Data got by bot', data); @@ -56,7 +61,11 @@ import cron from 'node-cron'; if (typeof(data.done) != 'undefined' && data.done == true) { taskMoniter.setTaskDone(task.id); await tajian.saveUrlShortcut(data); + }else { + taskMoniter.setTaskFailed(task.id); } + + spider_is_running = false; }else { console.error('No bot matched with url %s', task.url); }