Browse Source

add custom config support

master
filesite 7 months ago
parent
commit
0ccad60a56
  1. 5
      README.md
  2. 1
      bot/Bilibili.mjs
  3. 1
      bot/Douyin.mjs
  4. 1
      bot/HeroBot.mjs
  5. 1
      bot/Kuaishou.mjs
  6. 1
      bot/Xigua.mjs
  7. 19
      config.mjs
  8. 26
      lib/common.mjs
  9. 1
      lib/taskMoniter.mjs
  10. 4
      spider.mjs

5
README.md

@ -57,6 +57,11 @@ npm install @@ -57,6 +57,11 @@ npm install
npm start
```
带参数启动,设置自定义配置文件,覆盖默认的config.mjs
```
npm start -- config_custom.json
```
在目录todo/里创建任务文件,爬虫检测到新任务后自动抓取数据并保存到data/目录下。

1
bot/Bilibili.mjs

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';

1
bot/Douyin.mjs

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';

1
bot/HeroBot.mjs

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'url';

1
bot/Kuaishou.mjs

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';

1
bot/Xigua.mjs

@ -1,5 +1,4 @@ @@ -1,5 +1,4 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';

19
config.mjs

@ -1,4 +1,6 @@ @@ -1,4 +1,6 @@
export default {
import common from './lib/common.mjs';
let configs = {
//自动任务相关配置
task_list_dir: 'todo/', //待抓取任务文件保存目录
data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io
@ -59,3 +61,18 @@ export default { @@ -59,3 +61,18 @@ export default {
}
};
async function getConfig() {
//自定义JSON格式配置文件支持
if (process.argv.length >= 3) {
let configFile = process.argv[2];
let myConfigs = await common.loadCustomizeConfig(configFile);
if (myConfigs) {
configs = common.mergeConfigs(myConfigs, configs);
}
}
return configs;
}
export default getConfig;

26
lib/common.mjs

@ -1,5 +1,7 @@ @@ -1,5 +1,7 @@
//公用方法
import { rm as removeFile } from 'node:fs/promises';
import { readdir, readFile } from 'node:fs/promises';
import path from 'node:path';
export default {
@ -65,4 +67,28 @@ export default { @@ -65,4 +67,28 @@ export default {
return imgType;
},
loadCustomizeConfig: async function(configFileName) {
let configs = {};
try {
let filepath = path.resolve(configFileName);
let content = await readFile(filepath, { encoding: 'utf8' });
if (content) {
configs = JSON.parse(content);
}
}catch(error) {
console.error('Get config from %s failed: %s', configFileName, error);
}
return configs;
},
mergeConfigs: function(myConfig, configs) {
for (const key in myConfig) {
configs[key] = myConfig[key];
}
return configs;
}
};

1
lib/taskMoniter.mjs

@ -9,7 +9,6 @@ @@ -9,7 +9,6 @@
* -------------------
* task数据结构{id:'', url: '', status:''}
*/
import configs from '../config.mjs';
import common from './common.mjs';
import fs from 'node:fs';
import { readdir, readFile } from 'node:fs/promises';

4
spider.mjs

@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
* 爬虫主程序
* 负责监听任务目录里的新任务并自动抓取数据保存到数据目录
*/
import configs from './config.mjs';
import getConfigs from './config.mjs';
import common from './lib/common.mjs';
import TaskMoniter from "./lib/taskMoniter.mjs";
import TaJian from "./lib/tajian.mjs";
@ -16,6 +16,8 @@ import Bilibili from './bot/Bilibili.mjs'; @@ -16,6 +16,8 @@ import Bilibili from './bot/Bilibili.mjs';
import cron from 'node-cron';
(async () => {
//设置configs为全局变量
global.configs = await getConfigs();
const taskMoniter = new TaskMoniter(configs.task_list_dir);
const tajian = new TaJian(configs.data_save_dir);

Loading…
Cancel
Save