Browse Source

add class HeroBot

master
filesite 1 year ago
parent
commit
d56ad7c15c
  1. 67
      bot/HeroBot.mjs
  2. 22
      bot/Kuaishou.mjs
  3. 12
      bot/Xigua.mjs
  4. 57
      test/scrap_test.mjs

67
bot/HeroBot.mjs

@ -0,0 +1,67 @@ @@ -0,0 +1,67 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import fs from 'node:fs';
import path from 'node:path';
class HeroBot {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
this.supportedBots = {
douyin: 'https://www.douyin.com',
kuaishou: 'https://www.kuaishou.com',
xigua: 'https://www.ixigua.com',
bilibili: 'https://www.bilibili.com',
};
this.name = '';
}
//返回profile对象
async init(botName) {
if (typeof(this.supportedBots[botName]) == 'undefined') {
return false;
}
const base_url = this.supportedBots[botName];
try {
this.name = botName;
let options = {};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const profilePath = path.resolve('../tmp/', `profile_${botName}.json`);
if (fs.existsSync(profilePath) != false) {
const json = fs.readFileSync(profilePath, { encoding: 'utf8' });
options.userProfile = JSON.parse(json);
return options.userProfile;
}
const hero = new Hero(options);
await hero.goto(base_url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
//保存profile
const latestUserProfile = await hero.exportUserProfile();
fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2));
await hero.close();
return latestUserProfile;
}catch(error) {
console.error("Error got when request %s via hero: %s", base_url, error);
};
return false;
}
}
export default HeroBot;

22
bot/Kuaishou.mjs

@ -1,12 +1,8 @@ @@ -1,12 +1,8 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import fs from 'node:fs';
import path from 'node:path';
import HeroBot from './HeroBot.mjs';
class Kuaishou {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
}
class Kuaishou extends HeroBot {
async scrap(url) {
let data = {};
@ -18,12 +14,9 @@ class Kuaishou { @@ -18,12 +14,9 @@ class Kuaishou {
options.connectionToCore = this.heroServer;
}
const profilePath = path.resolve('../tmp/', 'profile_kuaishou.json');
let saveProfile = false;
if (fs.existsSync(profilePath) != false) {
const json = fs.readFileSync(profilePath, { encoding: 'utf8' });
options.userProfile = JSON.parse(json);
saveProfile = true;
const profile = await this.init('kuaishou');
if (profile) {
options.userProfile = profile;
}
const hero = new Hero(options);
@ -33,11 +26,6 @@ class Kuaishou { @@ -33,11 +26,6 @@ class Kuaishou {
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
if (saveProfile) {
const latestUserProfile = await hero.exportUserProfile();
fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2));
}
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;

12
bot/Xigua.mjs

@ -1,20 +1,24 @@ @@ -1,20 +1,24 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
class Xigua {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
}
class Xigua extends HeroBot {
async scrap(url) {
let data = {};
try {
let options = {};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('xigua');
if (profile) {
options.userProfile = profile;
}
const hero = new Hero(options);
await hero.goto(url, configs.heroBotOptions);

57
test/scrap_test.mjs

@ -4,15 +4,29 @@ import Xigua from '../bot/Xigua.mjs'; @@ -4,15 +4,29 @@ import Xigua from '../bot/Xigua.mjs';
import configs from '../config.mjs';
(async () => {
let test_bot = 'douyin';
if (process.argv.length == 3) {
test_bot = process.argv[2];
}
console.log('当前测试Bot:%s', test_bot);
const heroCloudServer = 'ws://192.168.3.13:1818';
let url = '', data = {};
switch(test_bot) {
case 'douyin':
//抖音测试
//const douyin = new Douyin(heroCloudServer);
//const url = 'https://v.douyin.com/iJr1NsJJ/';
//console.log('请求中: %s ...', url);
//const data = await douyin.scrap(url);
//console.log("解析结果:\n%s", JSON.stringify(data));
const douyin = new Douyin(heroCloudServer);
url = 'https://v.douyin.com/iJr1NsJJ/';
console.log('请求中: %s ...', url);
data = await douyin.scrap(url);
console.log("解析结果:\n%s", JSON.stringify(data));
break;
case 'kuaishou':
//快手测试
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
configs.heroBotOptions.userAgent = configs.userAgents.iphone_wechat;
@ -20,22 +34,31 @@ import configs from '../config.mjs'; @@ -20,22 +34,31 @@ import configs from '../config.mjs';
console.log('Hero配置', configs);
const kuaishou = new Kuaishou(heroCloudServer);
//const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
const url = 'https://v.kuaishou.com/7zwqe6';
//const url = 'https://www.kuaishou.com/';
//url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
url = 'https://v.kuaishou.com/7zwqe6';
//url = 'https://www.kuaishou.com/';
console.log('请求中: %s ...', url);
const data = await kuaishou.scrap(url);
data = await kuaishou.scrap(url);
console.log("解析结果:\n%s", JSON.stringify(data));
break;
case 'xigua':
//西瓜测试
//const url = 'https://www.ixigua.com/7092326495246516749';
//configs.heroBotOptions.referrer = url;
//configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome;
//console.log('Hero配置', configs);
//url = 'https://www.ixigua.com/7092326495246516749'; //pc
url = 'https://v.ixigua.com/ieUaqrFN/'; //mobile
configs.heroBotOptions.referrer = url;
configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome;
console.log('Hero配置', configs);
const xigua = new Xigua(heroCloudServer);
console.log('请求中: %s ...', url);
data = await xigua.scrap(`${url}?wid_try=1`);
console.log("解析结果:\n%s", JSON.stringify(data));
//const xigua = new Xigua(heroCloudServer);
//console.log('请求中: %s ...', url);
//const data = await xigua.scrap(`${url}?wid_try=1`);
//console.log("解析结果:\n%s", JSON.stringify(data));
break;
}
process.exit(0);
})();

Loading…
Cancel
Save