Browse Source

Call api of HeroUnion done

master
filesite 9 months ago
parent
commit
2a06808f89
  1. 4
      config.mjs
  2. 150
      lib/heroBot.mjs
  3. 25
      lib/taskMoniter.mjs
  4. 3
      package.json
  5. 71
      spider.mjs

4
config.mjs

@ -4,8 +4,9 @@ export default { @@ -4,8 +4,9 @@ export default {
data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io
//herounion对接配置
//HeroUnion英雄联盟对接配置
herounion: {
server_url: 'http://127.0.0.1:8080', //联盟服务地址
name: 'machete_hero', //爬虫名字
description: '支持Machete的TaJian皮肤的hero爬虫', //爬虫简介
platforms: 'douyin,kuaishou,xigua,bilibili', //爬虫支持的平台
@ -13,6 +14,7 @@ export default { @@ -13,6 +14,7 @@ export default {
country: 'cn', //爬虫所在国家
lang: 'zh', //爬虫支持的语言
contact: 'https://filesite.io', //爬虫的联系方式
data_mode: 'json' //爬虫支持的数据格式
},

150
lib/heroBot.mjs

@ -0,0 +1,150 @@ @@ -0,0 +1,150 @@
/**
* HeroUnion Bot SDK
*/
import test from 'node:test';
import assert from 'node:assert';
import axios from 'axios';
import md5 from 'md5';
class HeroBot {
constructor(
server_url,
bot_name,
bot_description,
support_platforms,
support_contracts,
bot_country,
bot_lang,
bot_contact,
data_mode
) {
//必填参数
this.union_server = server_url;
this.name = bot_name;
this.description = bot_description;
this.platforms = support_platforms;
this.contracts = support_contracts;
//可选参数
this.country = typeof(bot_country) != 'undefined' ? bot_country : 'cn';
this.lang = typeof(bot_lang) != 'undefined' ? bot_lang : 'zh';
this.contact = typeof(bot_contact) != 'undefined' ? bot_contact : '';
this.data_mode = typeof(data_mode) != 'undefined' ? data_mode : 'json';
//联盟API地址
this.apis = {
"heartBeat": `${server_url}/api/onboard/`,
"getNewTask": `${server_url}/api/gettask/`,
"saveTaskData": `${server_url}/api/savetask/`,
};
//axios请求配置
this.axiosConfig = {
timeout: 8000, //请求超时
proxy: false //是否走代理
};
}
getTimestampInSeconds() {
return Math.floor(Date.now() / 1000);
}
sortDict(obj) { //dict按key排序
return Object.keys(obj).sort().reduce(function(result, key) {
result[key] = obj[key];
return result;
}, {});
}
sign(params, token) { //对参数做MD5签名
return md5( JSON.stringify(this.sortDict(params)) + token );
}
//向联盟发送心跳数据
async heartBeat(status) {
let params = {
name: this.name,
description: this.description,
status: status,
timestamp: this.getTimestampInSeconds(),
platforms: this.platforms,
contracts: this.contracts,
country: this.country,
lang: this.lang,
contact: this.contact
};
let response = null;
try {
response = await axios.post(this.apis.heartBeat, params, this.axiosConfig);
}catch(err) {
console.error('[ERROR] HeroBot heart beat failed: %s, api: %s, params: %s',
err,
this.apis.heartBeat,
JSON.stringify(params)
);
}
return response ? response.data : false;
}
//从联盟领取任务
async getNewTask() {
let params = {
platforms: this.platforms,
contracts: this.contracts,
data_mode: this.data_mode,
country: this.country,
lang: this.lang
};
let queryOption = this.axiosConfig;
queryOption.method = 'get';
queryOption.url = this.apis.getNewTask;
queryOption.params = params;
let response = null;
try {
response = await await axios(queryOption);
}catch(err) {
console.error('[ERROR] HeroBot get new task failed: %s, api: %s, params: %s',
err,
this.apis.getNewTask,
JSON.stringify(params)
);
}
return response && response.data.code == 1 ? response.data.task : false;
}
//回传任务数据给联盟
async saveTaskData(task_id, task_token, task_data) {
let params = {
name: this.name,
task_id: task_id,
task_result: task_data
};
params.sign = this.sign(params, task_token); //对参数进行签名
let response = null;
try {
response = await axios.post(this.apis.saveTaskData, params, this.axiosConfig);
}catch(err) {
console.error('[ERROR] HeroBot save task data failed: %s, api: %s, params: %s',
err,
this.apis.saveTaskData,
JSON.stringify(params)
);
}
return response ? response.data : false;
}
}
export default HeroBot;

25
lib/taskMoniter.mjs

@ -9,11 +9,13 @@ @@ -9,11 +9,13 @@
* -------------------
* task数据结构{id:'', url: '', status:''}
*/
import configs from '../config.mjs';
import common from './common.mjs';
import fs from 'node:fs';
import { readdir, readFile } from 'node:fs/promises';
import path from 'node:path';
import cron from 'node-cron';
import HeroBot from "./heroBot.mjs";
class TaskMoniter {
constructor(task_list_dir) {
@ -36,6 +38,20 @@ class TaskMoniter { @@ -36,6 +38,20 @@ class TaskMoniter {
done: 'done',
failed: 'failed',
};
//HeroUnion英雄联盟对接
let heroUnionConfig = configs.herounion;
this.heroBot = new HeroBot(
heroUnionConfig.server_url,
heroUnionConfig.name,
heroUnionConfig.description,
heroUnionConfig.platforms,
heroUnionConfig.contracts,
heroUnionConfig.country,
heroUnionConfig.lang,
heroUnionConfig.contact,
heroUnionConfig.data_mode
);
}
getTaskFilePath(task_id) {
@ -159,6 +175,15 @@ class TaskMoniter { @@ -159,6 +175,15 @@ class TaskMoniter {
this.addTask(task);
}
//从HeroUnion获取任务
let unionTask = await this.heroBot.getNewTask();
if (unionTask) {
console.log('Got new union task %s, url: %s', unionTask.id, unionTask.url);
unionTask.status = this.statusCode.waiting;
unionTask.from = 'HeroUnion'; //标记此任务来自联盟
this.addTask(unionTask);
}
this.checking = false;
}catch(error) {
this.checking = false;

3
package.json

@ -5,7 +5,8 @@ @@ -5,7 +5,8 @@
"@ulixee/cloud": "^2.0.0-alpha.24",
"@ulixee/hero": "^2.0.0-alpha.24",
"node-cron": "^3.0.2",
"axios": "^1.3.3"
"axios": "^1.3.3",
"md5": "^2.3.0"
},
"scripts": {
"start": "node spider.mjs"

71
spider.mjs

@ -6,6 +6,7 @@ import configs from './config.mjs'; @@ -6,6 +6,7 @@ import configs from './config.mjs';
import common from './lib/common.mjs';
import TaskMoniter from "./lib/taskMoniter.mjs";
import TaJian from "./lib/tajian.mjs";
import HeroBot from "./lib/heroBot.mjs";
import Douyin from './bot/Douyin.mjs';
import Kuaishou from './bot/Kuaishou.mjs';
@ -37,18 +38,18 @@ import cron from 'node-cron'; @@ -37,18 +38,18 @@ import cron from 'node-cron';
console.log('New task %s handle by bot %s.', task.url, botName);
let bot = null;
switch (botName) {
case 'douyin':
bot = new Douyin(heroCloudServer);
break;
case 'kuaishou':
bot = new Kuaishou(heroCloudServer);
break;
case 'xigua':
bot = new Xigua(heroCloudServer);
break;
case 'bilibili':
bot = new Bilibili(heroCloudServer);
break;
case 'douyin':
bot = new Douyin(heroCloudServer);
break;
case 'kuaishou':
bot = new Kuaishou(heroCloudServer);
break;
case 'xigua':
bot = new Xigua(heroCloudServer);
break;
case 'bilibili':
bot = new Bilibili(heroCloudServer);
break;
}
if (bot) {
@ -62,30 +63,52 @@ import cron from 'node-cron'; @@ -62,30 +63,52 @@ import cron from 'node-cron';
if (
await tajian.saveUrlShortcut(task.id, data)
&& await tajian.saveDescriptionFiles(task.id, data)
) {
) {
taskMoniter.setTaskDone(task.id);
}else {
taskMoniter.setTaskFailed(task.id);
}
}else {
taskMoniter.setTaskFailed(task.id);
}
spider_is_running = false;
}else {
console.error('No bot matched with url %s', task.url);
taskMoniter.setTaskRunning(task.id);
taskMoniter.setTaskFailed(task.id);
}
spider_is_running = false;
}else {
console.error('No bot matched with url %s', task.url);
taskMoniter.setTaskRunning(task.id);
taskMoniter.setTaskFailed(task.id);
}
}, {
scheduled: false
});
}, {
scheduled: false
});
task_auto_run.start();
console.log('[%s] Spider started.', common.getTimeString());
//TODO: 对接英雄联盟接口:https://herounion.filesite.io
//HeroUnion英雄联盟对接
let heroUnionConfig = configs.herounion;
let heroBot = new HeroBot(
heroUnionConfig.server_url,
heroUnionConfig.name,
heroUnionConfig.description,
heroUnionConfig.platforms,
heroUnionConfig.contracts,
heroUnionConfig.country,
heroUnionConfig.lang,
heroUnionConfig.contact,
heroUnionConfig.data_mode
);
//爬虫心跳上报
const heartBeatFrequence = 60; //1 分钟上报一次
const heroUnionHeartBeat = cron.schedule(`*/${heartBeatFrequence} * * * * *`, async () => {
let status = spider_is_running ? 'busy' : 'idle';
const res = await heroBot.heartBeat(status);
console.log('HeroUnion bot heart beat result', res);
}, {scheduled: false});
heroUnionHeartBeat.start();
console.log('[%s] HeroUnion bot heart beat started.', common.getTimeString());
})().catch(error => {
console.error("Spider error got:\n%s", error);
process.exit(1);

Loading…
Cancel
Save