diff --git a/lib/common.mjs b/lib/common.mjs new file mode 100644 index 0000000..0276923 --- /dev/null +++ b/lib/common.mjs @@ -0,0 +1,10 @@ +//公用方法 +export default { + + getTimeString: function(locales) { + const today = new Date(); + if (typeof(locales) == 'undefined') {locales = 'zh-CN';} + return today.toLocaleString(locales); + }, + +}; diff --git a/lib/tajian.mjs b/lib/tajian.mjs new file mode 100644 index 0000000..5a9c775 --- /dev/null +++ b/lib/tajian.mjs @@ -0,0 +1,18 @@ +/** + * 基于Bot跟Machete的TaJian对接 + * 按filestie.io标准把Bot爬虫返回的数据格式化保存为.url及其说明文件 + */ + +class TaJian { + + constructor(data_save_dir) { + this.save_dir = data_save_dir; + } + + async saveUrlShortcut(data) { + + } + +} + +export default TaJian; diff --git a/lib/taskMoniter.mjs b/lib/taskMoniter.mjs new file mode 100644 index 0000000..a8b774b --- /dev/null +++ b/lib/taskMoniter.mjs @@ -0,0 +1,66 @@ +/** + * 对爬虫任务列表目录进行监控 + * 发现新任务 + * 删除已完成的任务文件 + * 内存中保存所有任务,及其状态 + * 返回当前任务状态 + */ +import common from './common.mjs'; +import fs from 'node:fs'; +import path from 'node:path'; +import cron from 'node-cron'; + +class TaskMoniter { + + constructor(task_list_dir) { + this.check_time_gap = 10; //检测间隔时间,单位:秒 + this.task_dir = task_list_dir; //监控目录:任务列表保存目录 + this.tasks = {}; //内存中的任务列表 + this.taskStatus = { //当前任务状态 + total: 0, //总任务数 + waiting: 0, //等待执行的任务数 + running: 0, //正在执行的任务数 + done: 0, //已完成的任务数 + failed: 0 //执行失败的任务数 + }; + } + + async getStatus() { + + } + + async getNewTask() { + + } + + async setTaskDone(task) { + + } + + async setTaskFailed(task) { + + } + + async checkTasks() { + } + + run() { //开始监控任务目录,把所有任务缓存到内存 + console.log('[%s] TaskMoniter started.', common.getTimeString()); + + //auto run + const task_check_time = this.check_time_gap; + const task_auto_run = cron.schedule(`*/${task_check_time} * * * * *`, () => { + console.log('[%s] TaskMoniter auto check...', common.getTimeString()); + + }, { + scheduled: false + }); + + task_auto_run.start(); + console.log('[%s] TaskMoniter auto check started.', common.getTimeString()); + + } + +} + +export default TaskMoniter; diff --git a/package.json b/package.json index c426d68..4fb0129 100644 --- a/package.json +++ b/package.json @@ -3,6 +3,7 @@ "type": "module", "dependencies": { "@ulixee/cloud": "^2.0.0-alpha.24", - "@ulixee/hero": "^2.0.0-alpha.24" + "@ulixee/hero": "^2.0.0-alpha.24", + "node-cron": "^3.0.2" } } diff --git a/spider.mjs b/spider.mjs new file mode 100644 index 0000000..59b5448 --- /dev/null +++ b/spider.mjs @@ -0,0 +1,24 @@ +/** + * 爬虫主程序 + * 负责监听任务目录里的新任务,并自动抓取数据保存到数据目录。 + */ +import configs from './config.mjs'; +import TaskMoniter from "./lib/taskMoniter.mjs"; +import TaJian from "./lib/tajian.mjs"; + +import Douyin from './bot/Douyin.mjs'; +import Kuaishou from './bot/Kuaishou.mjs'; +import Xigua from './bot/Xigua.mjs'; +import Bilibili from './bot/Bilibili.mjs'; + +(async () => { + + const taskMoniter = new TaskMoniter(configs.task_list_dir); + const tajian = new TaJian(configs.data_save_dir); + + taskMoniter.run(); + +})().catch(error => { + console.error("Spider error got:\n%s", error); + process.exit(1); +});