|
|
@ -25,8 +25,11 @@ import cron from 'node-cron'; |
|
|
|
const heroCloudServer = 'ws://192.168.3.13:1818'; |
|
|
|
const heroCloudServer = 'ws://192.168.3.13:1818'; |
|
|
|
|
|
|
|
|
|
|
|
//spider run
|
|
|
|
//spider run
|
|
|
|
|
|
|
|
let spider_is_running = false; |
|
|
|
const task_check_time = 20; //每 20 秒抓取一次
|
|
|
|
const task_check_time = 20; //每 20 秒抓取一次
|
|
|
|
const task_auto_run = cron.schedule(`*/${task_check_time} * * * * *`, async () => { |
|
|
|
const task_auto_run = cron.schedule(`*/${task_check_time} * * * * *`, async () => { |
|
|
|
|
|
|
|
if (spider_is_running == true) {return false;} //避免同时执行多个爬虫任务
|
|
|
|
|
|
|
|
|
|
|
|
const task = taskMoniter.getNewTask(); |
|
|
|
const task = taskMoniter.getNewTask(); |
|
|
|
if (!task) {return false;} |
|
|
|
if (!task) {return false;} |
|
|
|
|
|
|
|
|
|
|
@ -49,6 +52,8 @@ import cron from 'node-cron'; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (bot) { |
|
|
|
if (bot) { |
|
|
|
|
|
|
|
spider_is_running = true; |
|
|
|
|
|
|
|
|
|
|
|
taskMoniter.setTaskRunning(task.id); |
|
|
|
taskMoniter.setTaskRunning(task.id); |
|
|
|
const data = await bot.scrap(task.url); |
|
|
|
const data = await bot.scrap(task.url); |
|
|
|
console.log('Data got by bot', data); |
|
|
|
console.log('Data got by bot', data); |
|
|
@ -56,7 +61,11 @@ import cron from 'node-cron'; |
|
|
|
if (typeof(data.done) != 'undefined' && data.done == true) { |
|
|
|
if (typeof(data.done) != 'undefined' && data.done == true) { |
|
|
|
taskMoniter.setTaskDone(task.id); |
|
|
|
taskMoniter.setTaskDone(task.id); |
|
|
|
await tajian.saveUrlShortcut(data); |
|
|
|
await tajian.saveUrlShortcut(data); |
|
|
|
|
|
|
|
}else { |
|
|
|
|
|
|
|
taskMoniter.setTaskFailed(task.id); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spider_is_running = false; |
|
|
|
}else { |
|
|
|
}else { |
|
|
|
console.error('No bot matched with url %s', task.url); |
|
|
|
console.error('No bot matched with url %s', task.url); |
|
|
|
} |
|
|
|
} |
|
|
|