Browse Source

bot exception catch

master
filesite 7 months ago
parent
commit
870f41509e
  1. 124
      bot/Bilibili.mjs
  2. 118
      bot/Douyin.mjs
  3. 123
      bot/HeroBot.mjs
  4. 80
      bot/Kuaishou.mjs
  5. 104
      bot/Xigua.mjs

124
bot/Bilibili.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; @@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Bilibili extends HeroBot {
async scrap(url) {
let data = {url: url, done: false};
async scrap(url) {
let data = {url: url, done: false};
try {
let options = {
userAgent: configs.userAgent,
viewport: configs.viewport
@ -18,71 +17,78 @@ class Bilibili extends HeroBot { @@ -18,71 +17,78 @@ class Bilibili extends HeroBot {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('bilibili');
if (profile) {
options.userProfile = profile;
try {
const profile = await this.init('bilibili');
if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Bilibili bot", err);
}
const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
try {
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('property');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('property');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data
//sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png
//替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.');
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
//get cover image's base64 data
//sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png
//替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.');
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
}
await hero.close();
await hero.close();
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
}
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
await hero.close();
}
return data;
}
return data;
}
}

118
bot/Douyin.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; @@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Douyin extends HeroBot {
async scrap(url) {
let data = {url: url, done: false};
async scrap(url) {
let data = {url: url, done: false};
try {
let options = {
userAgent: configs.userAgent,
viewport: configs.viewport
@ -18,68 +17,75 @@ class Douyin extends HeroBot { @@ -18,68 +17,75 @@ class Douyin extends HeroBot {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('douyin');
data.bot = this.name;
if (profile) {
options.userProfile = profile;
try {
const profile = await this.init('douyin');
if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Douyin bot", err);
}
const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('video_cover_image_url') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('video_title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
try {
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('video_cover_image_url') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('video_title') > -1) {
data.title = await elem.getAttribute('content');
}
}
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
}
await hero.close();
await hero.close();
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
}
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
await hero.close();
}
return data;
}
return data;
}
}

123
bot/HeroBot.mjs

@ -5,31 +5,30 @@ import { fileURLToPath } from 'url'; @@ -5,31 +5,30 @@ import { fileURLToPath } from 'url';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
class HeroBot {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
this.supportedBots = {
douyin: 'https://www.douyin.com',
kuaishou: 'https://www.kuaishou.com',
xigua: 'https://www.ixigua.com',
bilibili: 'https://www.bilibili.com',
};
this.name = '';
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
this.supportedBots = {
douyin: 'https://www.douyin.com',
kuaishou: 'https://www.kuaishou.com',
xigua: 'https://www.ixigua.com',
bilibili: 'https://www.bilibili.com',
};
const __filename = fileURLToPath(import.meta.url);
this.root = path.dirname(__filename);
}
this.name = '';
//返回profile对象
async init(botName) {
if (typeof(this.supportedBots[botName]) == 'undefined') {
return false;
const __filename = fileURLToPath(import.meta.url);
this.root = path.dirname(__filename);
}
const base_url = this.supportedBots[botName];
//返回profile对象
async init(botName) {
if (typeof(this.supportedBots[botName]) == 'undefined') {
return false;
}
const base_url = this.supportedBots[botName];
try {
this.name = botName;
let options = {
@ -48,62 +47,64 @@ class HeroBot { @@ -48,62 +47,64 @@ class HeroBot {
}
//console.log('Hero init配置', configs);
const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(base_url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
try {
hero.use(ClientLogPlugin); //开启log
await hero.goto(base_url, configs.heroBotOptions);
//保存profile
const latestUserProfile = await hero.exportUserProfile();
this.saveProfile(latestUserProfile);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.close();
return latestUserProfile;
}catch(error) {
console.error("Error got when request %s via hero: %s", base_url, error);
}
//保存profile
const latestUserProfile = await hero.exportUserProfile();
this.saveProfile(latestUserProfile);
return false;
}
await hero.close();
//保存profile
saveProfile(profile) {
if (this.name == '') {return false;}
const botName = this.name;
return latestUserProfile;
}catch(error) {
console.error("Error got when bot init with %s via hero, error: %s", base_url, error);
await hero.close();
}
try {
//保存profile
const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`);
profile = this.fixCookies(profile);
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
}catch(error) {
console.error("Error got when save profile of %s, error detail:\n%s", botName, error);
return false;
}
return true;
}
//保存profile
saveProfile(profile) {
if (this.name == '') {return false;}
//处理name为空的cookie
fixCookies(profile) {
let fixedProfile = profile;
if (typeof(profile.cookies) == 'undefined') {return profile;}
const botName = this.name;
const botName = this.name;
for (const index in profile.cookies) {
if (profile.cookies[index].name == '') {
fixedProfile.cookies[index].name = botName;
try {
//保存profile
const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`);
profile = this.fixCookies(profile);
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
}catch(error) {
console.error("Error got when save profile of %s, error detail:\n%s", botName, error);
return false;
}
return true;
}
return fixedProfile;
}
//处理name为空的cookie
fixCookies(profile) {
let fixedProfile = profile;
if (typeof(profile.cookies) == 'undefined') {return profile;}
const botName = this.name;
for (const index in profile.cookies) {
if (profile.cookies[index].name == '') {
fixedProfile.cookies[index].name = botName;
}
}
return fixedProfile;
}
}

80
bot/Kuaishou.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; @@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Kuaishou extends HeroBot {
async scrap(url) {
let data = {url: url, done: false};
async scrap(url) {
let data = {url: url, done: false};
try {
let options = {
userAgent: configs.userAgent,
viewport: configs.viewport
@ -18,52 +17,59 @@ class Kuaishou extends HeroBot { @@ -18,52 +17,59 @@ class Kuaishou extends HeroBot {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('kuaishou');
if (profile) {
options.userProfile = profile;
try {
const profile = await this.init('kuaishou');
if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Kuaishou bot", err);
}
const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
try {
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
const elem = hero.document.querySelector('.video-container-player');
if (elem) {
data.cover = await elem.getAttribute('poster');
}
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const elem = hero.document.querySelector('.video-container-player');
if (elem) {
data.cover = await elem.getAttribute('poster');
}
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
}
await hero.close();
await hero.close();
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
};
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
await hero.close();
};
return data;
}
return data;
}
}

104
bot/Xigua.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; @@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Xigua extends HeroBot {
async scrap(url) {
let data = {url: url, done: false};
async scrap(url) {
let data = {url: url, done: false};
try {
let options = {
userAgent: configs.userAgent,
viewport: configs.viewport
@ -18,61 +17,68 @@ class Xigua extends HeroBot { @@ -18,61 +17,68 @@ class Xigua extends HeroBot {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('xigua');
if (profile) {
options.userProfile = profile;
try {
const profile = await this.init('xigua');
if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Xigua bot", err);
}
const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
try {
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
}
await hero.close();
await hero.close();
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
};
data.bot = this.name;
data.done = true;
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
await hero.close();
};
return data;
}
return data;
}
}

Loading…
Cancel
Save