diff --git a/README.md b/README.md index 0c99b6c..64807f2 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,11 @@ npm install npm start ``` +带参数启动,设置自定义配置文件,覆盖默认的config.mjs +``` +npm start -- config_custom.json +``` + 在目录todo/里创建任务文件,爬虫检测到新任务后自动抓取数据并保存到data/目录下。 diff --git a/bot/Bilibili.mjs b/bot/Bilibili.mjs index 324680f..5af59e8 100644 --- a/bot/Bilibili.mjs +++ b/bot/Bilibili.mjs @@ -1,5 +1,4 @@ import Hero from '@ulixee/hero'; -import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import common from '../lib/common.mjs'; diff --git a/bot/Douyin.mjs b/bot/Douyin.mjs index c060095..8a5e548 100644 --- a/bot/Douyin.mjs +++ b/bot/Douyin.mjs @@ -1,5 +1,4 @@ import Hero from '@ulixee/hero'; -import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import common from '../lib/common.mjs'; diff --git a/bot/HeroBot.mjs b/bot/HeroBot.mjs index ba2f92b..b1f0093 100644 --- a/bot/HeroBot.mjs +++ b/bot/HeroBot.mjs @@ -1,5 +1,4 @@ import Hero from '@ulixee/hero'; -import configs from '../config.mjs'; import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'url'; diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index fa3cd44..ebb440f 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -1,5 +1,4 @@ import Hero from '@ulixee/hero'; -import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import common from '../lib/common.mjs'; diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs index 5536862..264b1ff 100644 --- a/bot/Xigua.mjs +++ b/bot/Xigua.mjs @@ -1,5 +1,4 @@ import Hero from '@ulixee/hero'; -import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import common from '../lib/common.mjs'; diff --git a/config.mjs b/config.mjs index ec09a02..746f2e7 100644 --- a/config.mjs +++ b/config.mjs @@ -1,4 +1,6 @@ -export default { +import common from './lib/common.mjs'; + +let configs = { //自动任务相关配置 task_list_dir: 'todo/', //待抓取任务文件保存目录 data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io @@ -59,3 +61,18 @@ export default { } }; + +async function getConfig() { + //自定义JSON格式配置文件支持 + if (process.argv.length >= 3) { + let configFile = process.argv[2]; + let myConfigs = await common.loadCustomizeConfig(configFile); + if (myConfigs) { + configs = common.mergeConfigs(myConfigs, configs); + } + } + + return configs; +} + +export default getConfig; \ No newline at end of file diff --git a/lib/common.mjs b/lib/common.mjs index 4c4866e..be04798 100644 --- a/lib/common.mjs +++ b/lib/common.mjs @@ -1,5 +1,7 @@ //公用方法 import { rm as removeFile } from 'node:fs/promises'; +import { readdir, readFile } from 'node:fs/promises'; +import path from 'node:path'; export default { @@ -65,4 +67,28 @@ export default { return imgType; }, + loadCustomizeConfig: async function(configFileName) { + let configs = {}; + + try { + let filepath = path.resolve(configFileName); + let content = await readFile(filepath, { encoding: 'utf8' }); + if (content) { + configs = JSON.parse(content); + } + }catch(error) { + console.error('Get config from %s failed: %s', configFileName, error); + } + + return configs; + }, + + mergeConfigs: function(myConfig, configs) { + for (const key in myConfig) { + configs[key] = myConfig[key]; + } + + return configs; + } + }; diff --git a/lib/taskMoniter.mjs b/lib/taskMoniter.mjs index c0b8566..984f2f5 100644 --- a/lib/taskMoniter.mjs +++ b/lib/taskMoniter.mjs @@ -9,7 +9,6 @@ * ------------------- * task数据结构:{id:'', url: '', status:''} */ -import configs from '../config.mjs'; import common from './common.mjs'; import fs from 'node:fs'; import { readdir, readFile } from 'node:fs/promises'; diff --git a/spider.mjs b/spider.mjs index 9ffe3c9..90b9a85 100644 --- a/spider.mjs +++ b/spider.mjs @@ -2,7 +2,7 @@ * 爬虫主程序 * 负责监听任务目录里的新任务,并自动抓取数据保存到数据目录。 */ -import configs from './config.mjs'; +import getConfigs from './config.mjs'; import common from './lib/common.mjs'; import TaskMoniter from "./lib/taskMoniter.mjs"; import TaJian from "./lib/tajian.mjs"; @@ -16,6 +16,8 @@ import Bilibili from './bot/Bilibili.mjs'; import cron from 'node-cron'; (async () => { + //设置configs为全局变量 + global.configs = await getConfigs(); const taskMoniter = new TaskMoniter(configs.task_list_dir); const tajian = new TaJian(configs.data_save_dir);