Browse Source

bot exception catch

master
filesite 7 months ago
parent
commit
870f41509e
  1. 124
      bot/Bilibili.mjs
  2. 118
      bot/Douyin.mjs
  3. 123
      bot/HeroBot.mjs
  4. 80
      bot/Kuaishou.mjs
  5. 104
      bot/Xigua.mjs

124
bot/Bilibili.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Bilibili extends HeroBot { class Bilibili extends HeroBot {
async scrap(url) { async scrap(url) {
let data = {url: url, done: false}; let data = {url: url, done: false};
try {
let options = { let options = {
userAgent: configs.userAgent, userAgent: configs.userAgent,
viewport: configs.viewport viewport: configs.viewport
@ -18,71 +17,78 @@ class Bilibili extends HeroBot {
options.connectionToCore = this.heroServer; options.connectionToCore = this.heroServer;
} }
const profile = await this.init('bilibili'); try {
if (profile) { const profile = await this.init('bilibili');
options.userProfile = profile; if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Bilibili bot", err);
} }
const hero = new Hero(options); const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions); try {
hero.use(ClientLogPlugin); //开启log
//等待所有内容加载完成 await hero.goto(url, configs.heroBotOptions);
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); //等待所有内容加载完成
await hero.waitForState({ const tab = await hero.activeTab;
all(assert) { await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
assert( await hero.waitForState({
hero.document.title, all(assert) {
text => text != '', assert(
); hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('property');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
} }
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('property');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data //get cover image's base64 data
//sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png //sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png
//替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png //替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png
if (typeof(data.cover) != 'undefined' && data.cover) { if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover); data.cover = common.getAbsoluteUrl(data.cover);
data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.'); data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.');
const response = await hero.goto(data.cover); const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer; const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64')); //console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) { if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64'); data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover); data.cover_type = common.getImageType(data.cover);
}
} }
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;
data.done = true; data.done = true;
}catch(error) { }catch(error) {
console.error("Error got when request %s via hero: %s", url, error); console.error("Error got when request %s via hero: %s", url, error);
} await hero.close();
}
return data; return data;
} }
} }

118
bot/Douyin.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Douyin extends HeroBot { class Douyin extends HeroBot {
async scrap(url) { async scrap(url) {
let data = {url: url, done: false}; let data = {url: url, done: false};
try {
let options = { let options = {
userAgent: configs.userAgent, userAgent: configs.userAgent,
viewport: configs.viewport viewport: configs.viewport
@ -18,68 +17,75 @@ class Douyin extends HeroBot {
options.connectionToCore = this.heroServer; options.connectionToCore = this.heroServer;
} }
const profile = await this.init('douyin'); try {
data.bot = this.name; const profile = await this.init('douyin');
if (profile) { if (profile) {
options.userProfile = profile; options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Douyin bot", err);
} }
const hero = new Hero(options); const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('video_cover_image_url') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('video_title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data try {
if (typeof(data.cover) != 'undefined' && data.cover) { hero.use(ClientLogPlugin); //开启log
data.cover = common.getAbsoluteUrl(data.cover); await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('video_cover_image_url') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('video_title') > -1) {
data.title = await elem.getAttribute('content');
}
}
const response = await hero.goto(data.cover); //get cover image's base64 data
const imgBuffer = await response.buffer; if (typeof(data.cover) != 'undefined' && data.cover) {
//console.log('Cover image fetch done', imgBuffer.toString('base64')); data.cover = common.getAbsoluteUrl(data.cover);
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64'); const response = await hero.goto(data.cover);
data.cover_type = common.getImageType(data.cover); const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
} }
}
await hero.close(); await hero.close();
data.done = true; data.bot = this.name;
}catch(error) { data.done = true;
console.error("Error got when request %s via hero: %s", url, error); }catch(error) {
} console.error("Error got when request %s via hero: %s", url, error);
await hero.close();
}
return data; return data;
} }
} }

123
bot/HeroBot.mjs

@ -5,31 +5,30 @@ import { fileURLToPath } from 'url';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
class HeroBot { class HeroBot {
constructor(heroCloudServer) { constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : ''; this.heroServer = heroCloudServer ? heroCloudServer : '';
this.supportedBots = { this.supportedBots = {
douyin: 'https://www.douyin.com', douyin: 'https://www.douyin.com',
kuaishou: 'https://www.kuaishou.com', kuaishou: 'https://www.kuaishou.com',
xigua: 'https://www.ixigua.com', xigua: 'https://www.ixigua.com',
bilibili: 'https://www.bilibili.com', bilibili: 'https://www.bilibili.com',
}; };
this.name = '';
const __filename = fileURLToPath(import.meta.url); this.name = '';
this.root = path.dirname(__filename);
}
//返回profile对象 const __filename = fileURLToPath(import.meta.url);
async init(botName) { this.root = path.dirname(__filename);
if (typeof(this.supportedBots[botName]) == 'undefined') {
return false;
} }
const base_url = this.supportedBots[botName]; //返回profile对象
async init(botName) {
if (typeof(this.supportedBots[botName]) == 'undefined') {
return false;
}
const base_url = this.supportedBots[botName];
try {
this.name = botName; this.name = botName;
let options = { let options = {
@ -48,62 +47,64 @@ class HeroBot {
} }
//console.log('Hero init配置', configs); //console.log('Hero init配置', configs);
const hero = new Hero(options); const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(base_url, configs.heroBotOptions);
//等待所有内容加载完成 try {
const tab = await hero.activeTab; hero.use(ClientLogPlugin); //开启log
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); await hero.goto(base_url, configs.heroBotOptions);
//保存profile //等待所有内容加载完成
const latestUserProfile = await hero.exportUserProfile(); const tab = await hero.activeTab;
this.saveProfile(latestUserProfile); await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.close(); //保存profile
const latestUserProfile = await hero.exportUserProfile();
return latestUserProfile; this.saveProfile(latestUserProfile);
}catch(error) {
console.error("Error got when request %s via hero: %s", base_url, error);
}
return false; await hero.close();
}
//保存profile return latestUserProfile;
saveProfile(profile) { }catch(error) {
if (this.name == '') {return false;} console.error("Error got when bot init with %s via hero, error: %s", base_url, error);
await hero.close();
const botName = this.name; }
try {
//保存profile
const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`);
profile = this.fixCookies(profile);
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
}catch(error) {
console.error("Error got when save profile of %s, error detail:\n%s", botName, error);
return false; return false;
} }
return true; //保存profile
} saveProfile(profile) {
if (this.name == '') {return false;}
//处理name为空的cookie const botName = this.name;
fixCookies(profile) {
let fixedProfile = profile;
if (typeof(profile.cookies) == 'undefined') {return profile;}
const botName = this.name; try {
for (const index in profile.cookies) { //保存profile
if (profile.cookies[index].name == '') { const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`);
fixedProfile.cookies[index].name = botName; profile = this.fixCookies(profile);
fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2));
}catch(error) {
console.error("Error got when save profile of %s, error detail:\n%s", botName, error);
return false;
} }
return true;
} }
return fixedProfile; //处理name为空的cookie
} fixCookies(profile) {
let fixedProfile = profile;
if (typeof(profile.cookies) == 'undefined') {return profile;}
const botName = this.name;
for (const index in profile.cookies) {
if (profile.cookies[index].name == '') {
fixedProfile.cookies[index].name = botName;
}
}
return fixedProfile;
}
} }

80
bot/Kuaishou.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Kuaishou extends HeroBot { class Kuaishou extends HeroBot {
async scrap(url) { async scrap(url) {
let data = {url: url, done: false}; let data = {url: url, done: false};
try {
let options = { let options = {
userAgent: configs.userAgent, userAgent: configs.userAgent,
viewport: configs.viewport viewport: configs.viewport
@ -18,52 +17,59 @@ class Kuaishou extends HeroBot {
options.connectionToCore = this.heroServer; options.connectionToCore = this.heroServer;
} }
const profile = await this.init('kuaishou'); try {
if (profile) { const profile = await this.init('kuaishou');
options.userProfile = profile; if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Kuaishou bot", err);
} }
const hero = new Hero(options); const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成 try {
const tab = await hero.activeTab; hero.use(ClientLogPlugin); //开启log
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); await hero.goto(url, configs.heroBotOptions);
await hero.waitForPaintingStable();
//解析网页HTML数据 //等待所有内容加载完成
data.title = await hero.document.title; const tab = await hero.activeTab;
//data.url = await hero.url; await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
const elem = hero.document.querySelector('.video-container-player'); //解析网页HTML数据
if (elem) { data.title = await hero.document.title;
data.cover = await elem.getAttribute('poster'); //data.url = await hero.url;
}
//get cover image's base64 data const elem = hero.document.querySelector('.video-container-player');
if (typeof(data.cover) != 'undefined' && data.cover) { if (elem) {
data.cover = common.getAbsoluteUrl(data.cover); data.cover = await elem.getAttribute('poster');
}
const response = await hero.goto(data.cover); //get cover image's base64 data
const imgBuffer = await response.buffer; if (typeof(data.cover) != 'undefined' && data.cover) {
//console.log('Cover image fetch done', imgBuffer.toString('base64')); data.cover = common.getAbsoluteUrl(data.cover);
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64'); const response = await hero.goto(data.cover);
data.cover_type = common.getImageType(data.cover); const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
} }
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;
data.done = true; data.done = true;
}catch(error) { }catch(error) {
console.error("Error got when request %s via hero: %s", url, error); console.error("Error got when request %s via hero: %s", url, error);
}; await hero.close();
};
return data; return data;
} }
} }

104
bot/Xigua.mjs

@ -5,10 +5,9 @@ import common from '../lib/common.mjs';
class Xigua extends HeroBot { class Xigua extends HeroBot {
async scrap(url) { async scrap(url) {
let data = {url: url, done: false}; let data = {url: url, done: false};
try {
let options = { let options = {
userAgent: configs.userAgent, userAgent: configs.userAgent,
viewport: configs.viewport viewport: configs.viewport
@ -18,61 +17,68 @@ class Xigua extends HeroBot {
options.connectionToCore = this.heroServer; options.connectionToCore = this.heroServer;
} }
const profile = await this.init('xigua'); try {
if (profile) { const profile = await this.init('xigua');
options.userProfile = profile; if (profile) {
options.userProfile = profile;
}
}catch(err) {
console.error("Error got when init Xigua bot", err);
} }
const hero = new Hero(options); const hero = new Hero(options);
hero.use(ClientLogPlugin); //开启log
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
//get cover image's base64 data try {
if (typeof(data.cover) != 'undefined' && data.cover) { hero.use(ClientLogPlugin); //开启log
data.cover = common.getAbsoluteUrl(data.cover); await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForPaintingStable();
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
const response = await hero.goto(data.cover); //get cover image's base64 data
const imgBuffer = await response.buffer; if (typeof(data.cover) != 'undefined' && data.cover) {
//console.log('Cover image fetch done', imgBuffer.toString('base64')); data.cover = common.getAbsoluteUrl(data.cover);
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64'); const response = await hero.goto(data.cover);
data.cover_type = common.getImageType(data.cover); const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
} }
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;
data.done = true; data.done = true;
}catch(error) { }catch(error) {
console.error("Error got when request %s via hero: %s", url, error); console.error("Error got when request %s via hero: %s", url, error);
}; await hero.close();
};
return data; return data;
} }
} }

Loading…
Cancel
Save