diff --git a/README.md b/README.md index 1b4eaa8..c8c6b94 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Hero scripts of machete. ## 使用方法 1. 下载本源码到本地后,进入项目根目录 + ``` git clone "https://git.filesite.io/filesite/machete_hero.git" cd machete_hero/ @@ -47,6 +48,7 @@ cd machete_hero/ 2. 执行下面命令安装依赖包 + ``` npm install ``` @@ -55,17 +57,28 @@ npm install 3. 执行下面命令启动Hero Cloud + +请先安装docker并下载镜像: + ``` -./start_cloud.sh +docker pull ulixee/ulixee-cloud +``` + +再执行cloud启动脚本: + +``` +./start_cloud_in_container.sh ``` 4. 执行下面命令启动爬虫spider.mjs + ``` npm start ``` 带参数启动,设置自定义配置文件,覆盖默认的config.mjs + ``` npm start -- config_custom.json ``` @@ -73,6 +86,7 @@ npm start -- config_custom.json 在目录todo/里创建任务文件,爬虫检测到新任务后自动抓取数据并保存到data/目录下。 手动添加任务命令示例: + ``` echo "https://tajian.tv" > todo/test_01.task ``` diff --git a/config_custom.json b/config_custom.json new file mode 100644 index 0000000..a4430af --- /dev/null +++ b/config_custom.json @@ -0,0 +1,15 @@ +{ + "herounion": { + "server_url": "https://herounion.filesite.io", + "name": "machete_hero", + "description": "支持TaJian.tv的hero爬虫", + "platforms": "douyin,kuaishou,xigua,bilibili,website", + "contracts": "tajiantv", + "country": "cn", + "lang": "zh", + "contact": "https://filesite.io", + "data_mode": "json", + + "notify_max_try": 5 + } +} \ No newline at end of file diff --git a/package.json b/package.json index 58133f1..9d26801 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,6 @@ "node": ">=18" }, "dependencies": { - "@ulixee/cloud": "^2.0.0-alpha.24", "@ulixee/hero": "^2.0.0-alpha.24", "node-cron": "^3.0.2", "axios": "^1.3.3", diff --git a/spider_watcher.sh b/spider_watcher.sh index 224adae..b36e34d 100755 --- a/spider_watcher.sh +++ b/spider_watcher.sh @@ -17,7 +17,7 @@ watcher() if [ $cloud_num -ge 1 ]; then echo "ulixee cloud is alive, start spider" cd $script_root - npm start -- myconfig.json + npm start -- config_custom.json else echo "ulixee cloud is down, try to restart it" docker stop ulixee_cloud diff --git a/start_cloud.sh b/start_cloud.sh old mode 100755 new mode 100644