diff --git a/bot/HeroBot.mjs b/bot/HeroBot.mjs new file mode 100644 index 0000000..0496a1a --- /dev/null +++ b/bot/HeroBot.mjs @@ -0,0 +1,67 @@ +import Hero from '@ulixee/hero'; +import configs from '../config.mjs'; +import fs from 'node:fs'; +import path from 'node:path'; + +class HeroBot { + constructor(heroCloudServer) { + this.heroServer = heroCloudServer ? heroCloudServer : ''; + + this.supportedBots = { + douyin: 'https://www.douyin.com', + kuaishou: 'https://www.kuaishou.com', + xigua: 'https://www.ixigua.com', + bilibili: 'https://www.bilibili.com', + }; + + this.name = ''; + } + + //返回profile对象 + async init(botName) { + if (typeof(this.supportedBots[botName]) == 'undefined') { + return false; + } + + const base_url = this.supportedBots[botName]; + + try { + this.name = botName; + + let options = {}; + + if (this.heroServer) { + options.connectionToCore = this.heroServer; + } + + const profilePath = path.resolve('../tmp/', `profile_${botName}.json`); + if (fs.existsSync(profilePath) != false) { + const json = fs.readFileSync(profilePath, { encoding: 'utf8' }); + options.userProfile = JSON.parse(json); + return options.userProfile; + } + + const hero = new Hero(options); + await hero.goto(base_url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + + //保存profile + const latestUserProfile = await hero.exportUserProfile(); + fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2)); + + await hero.close(); + + return latestUserProfile; + }catch(error) { + console.error("Error got when request %s via hero: %s", base_url, error); + }; + + return false; + } + +} + +export default HeroBot; diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index abd035a..20ac7e7 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -1,12 +1,8 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; -import fs from 'node:fs'; -import path from 'node:path'; +import HeroBot from './HeroBot.mjs'; -class Kuaishou { - constructor(heroCloudServer) { - this.heroServer = heroCloudServer ? heroCloudServer : ''; - } +class Kuaishou extends HeroBot { async scrap(url) { let data = {}; @@ -18,12 +14,9 @@ class Kuaishou { options.connectionToCore = this.heroServer; } - const profilePath = path.resolve('../tmp/', 'profile_kuaishou.json'); - let saveProfile = false; - if (fs.existsSync(profilePath) != false) { - const json = fs.readFileSync(profilePath, { encoding: 'utf8' }); - options.userProfile = JSON.parse(json); - saveProfile = true; + const profile = await this.init('kuaishou'); + if (profile) { + options.userProfile = profile; } const hero = new Hero(options); @@ -33,11 +26,6 @@ class Kuaishou { const tab = await hero.activeTab; await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - if (saveProfile) { - const latestUserProfile = await hero.exportUserProfile(); - fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2)); - } - //解析网页HTML数据 data.title = await hero.document.title; //data.url = await hero.url; diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs index 8f08e4e..5ec1522 100644 --- a/bot/Xigua.mjs +++ b/bot/Xigua.mjs @@ -1,20 +1,24 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; +import HeroBot from './HeroBot.mjs'; -class Xigua { - constructor(heroCloudServer) { - this.heroServer = heroCloudServer ? heroCloudServer : ''; - } +class Xigua extends HeroBot { async scrap(url) { let data = {}; try { let options = {}; + if (this.heroServer) { options.connectionToCore = this.heroServer; } + const profile = await this.init('xigua'); + if (profile) { + options.userProfile = profile; + } + const hero = new Hero(options); await hero.goto(url, configs.heroBotOptions); diff --git a/test/scrap_test.mjs b/test/scrap_test.mjs index 522fc5c..00e13df 100644 --- a/test/scrap_test.mjs +++ b/test/scrap_test.mjs @@ -4,38 +4,61 @@ import Xigua from '../bot/Xigua.mjs'; import configs from '../config.mjs'; (async () => { + let test_bot = 'douyin'; + if (process.argv.length == 3) { + test_bot = process.argv[2]; + } + console.log('当前测试Bot:%s', test_bot); + const heroCloudServer = 'ws://192.168.3.13:1818'; + let url = '', data = {}; + + switch(test_bot) { + + case 'douyin': + //抖音测试 + const douyin = new Douyin(heroCloudServer); + url = 'https://v.douyin.com/iJr1NsJJ/'; + console.log('请求中: %s ...', url); + data = await douyin.scrap(url); + console.log("解析结果:\n%s", JSON.stringify(data)); + + break; + + + case 'kuaishou': + //快手测试 + configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时 + configs.heroBotOptions.userAgent = configs.userAgents.iphone_wechat; + //configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; + console.log('Hero配置', configs); + + const kuaishou = new Kuaishou(heroCloudServer); + //url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; + url = 'https://v.kuaishou.com/7zwqe6'; + //url = 'https://www.kuaishou.com/'; + console.log('请求中: %s ...', url); + data = await kuaishou.scrap(url); + console.log("解析结果:\n%s", JSON.stringify(data)); + + break; + + + case 'xigua': + //西瓜测试 + //url = 'https://www.ixigua.com/7092326495246516749'; //pc + url = 'https://v.ixigua.com/ieUaqrFN/'; //mobile + configs.heroBotOptions.referrer = url; + configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; + console.log('Hero配置', configs); + + const xigua = new Xigua(heroCloudServer); + console.log('请求中: %s ...', url); + data = await xigua.scrap(`${url}?wid_try=1`); + console.log("解析结果:\n%s", JSON.stringify(data)); - //抖音测试 - //const douyin = new Douyin(heroCloudServer); - //const url = 'https://v.douyin.com/iJr1NsJJ/'; - //console.log('请求中: %s ...', url); - //const data = await douyin.scrap(url); - //console.log("解析结果:\n%s", JSON.stringify(data)); - - //快手测试 - configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时 - configs.heroBotOptions.userAgent = configs.userAgents.iphone_wechat; - //configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; - console.log('Hero配置', configs); - - const kuaishou = new Kuaishou(heroCloudServer); - //const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; - const url = 'https://v.kuaishou.com/7zwqe6'; - //const url = 'https://www.kuaishou.com/'; - console.log('请求中: %s ...', url); - const data = await kuaishou.scrap(url); - console.log("解析结果:\n%s", JSON.stringify(data)); - - //西瓜测试 - //const url = 'https://www.ixigua.com/7092326495246516749'; - //configs.heroBotOptions.referrer = url; - //configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; - //console.log('Hero配置', configs); - - //const xigua = new Xigua(heroCloudServer); - //console.log('请求中: %s ...', url); - //const data = await xigua.scrap(`${url}?wid_try=1`); - //console.log("解析结果:\n%s", JSON.stringify(data)); + break; + } + process.exit(0); })();