diff --git a/bot/Bilibili.mjs b/bot/Bilibili.mjs index 5af59e8..daa80ab 100644 --- a/bot/Bilibili.mjs +++ b/bot/Bilibili.mjs @@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; class Bilibili extends HeroBot { - async scrap(url) { - let data = {url: url, done: false}; + async scrap(url) { + let data = {url: url, done: false}; - try { let options = { userAgent: configs.userAgent, viewport: configs.viewport @@ -18,71 +17,78 @@ class Bilibili extends HeroBot { options.connectionToCore = this.heroServer; } - const profile = await this.init('bilibili'); - if (profile) { - options.userProfile = profile; + try { + const profile = await this.init('bilibili'); + if (profile) { + options.userProfile = profile; + } + }catch(err) { + console.error("Error got when init Bilibili bot", err); } const hero = new Hero(options); - hero.use(ClientLogPlugin); //开启log - await hero.goto(url, configs.heroBotOptions); - - //等待所有内容加载完成 - const tab = await hero.activeTab; - await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - await hero.waitForState({ - all(assert) { - assert( - hero.document.title, - text => text != '', - ); + + try { + hero.use(ClientLogPlugin); //开启log + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + await hero.waitForState({ + all(assert) { + assert( + hero.document.title, + text => text != '', + ); + } + }); + + //解析网页HTML数据 + data.title = await hero.document.title; + //data.url = await hero.url; + + const elems = await hero.document.querySelectorAll('meta'); + let meta_name = ''; + for (const elem of elems) { + meta_name = await elem.getAttribute('property'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + //console.log('meta', meta_name); + if (meta_name.indexOf('og:image') > -1) { + data.cover = await elem.getAttribute('content'); + }else if (meta_name.indexOf('og:title') > -1) { + data.title = await elem.getAttribute('content'); + } } - }); - - //解析网页HTML数据 - data.title = await hero.document.title; - //data.url = await hero.url; - - const elems = await hero.document.querySelectorAll('meta'); - let meta_name = ''; - for (const elem of elems) { - meta_name = await elem.getAttribute('property'); - if (!meta_name) {continue;} - meta_name = meta_name.toLowerCase(); - //console.log('meta', meta_name); - if (meta_name.indexOf('og:image') > -1) { - data.cover = await elem.getAttribute('content'); - }else if (meta_name.indexOf('og:title') > -1) { - data.title = await elem.getAttribute('content'); - } - } - //get cover image's base64 data - //sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png - //替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png - if (typeof(data.cover) != 'undefined' && data.cover) { - data.cover = common.getAbsoluteUrl(data.cover); - data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.'); - - const response = await hero.goto(data.cover); - const imgBuffer = await response.buffer; - //console.log('Cover image fetch done', imgBuffer.toString('base64')); - if (imgBuffer) { - data.cover_base64 = imgBuffer.toString('base64'); - data.cover_type = common.getImageType(data.cover); + //get cover image's base64 data + //sample: //i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@100w_100h_1c.png + //替换成://i1.hdslb.com/bfs/archive/ef6204c8788134064dc6b7e8cb20870f1341e604.jpg@480w_270h_1c.png + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + data.cover = data.cover.replace(/@[\w]+\./ig, '@480w_270h_1c.'); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } } - } - await hero.close(); + await hero.close(); - data.bot = this.name; - data.done = true; - }catch(error) { - console.error("Error got when request %s via hero: %s", url, error); - } + data.bot = this.name; + data.done = true; + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + await hero.close(); + } - return data; - } + return data; + } } diff --git a/bot/Douyin.mjs b/bot/Douyin.mjs index 8a5e548..74c2b51 100644 --- a/bot/Douyin.mjs +++ b/bot/Douyin.mjs @@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; class Douyin extends HeroBot { - async scrap(url) { - let data = {url: url, done: false}; + async scrap(url) { + let data = {url: url, done: false}; - try { let options = { userAgent: configs.userAgent, viewport: configs.viewport @@ -18,68 +17,75 @@ class Douyin extends HeroBot { options.connectionToCore = this.heroServer; } - const profile = await this.init('douyin'); - data.bot = this.name; - if (profile) { - options.userProfile = profile; + try { + const profile = await this.init('douyin'); + if (profile) { + options.userProfile = profile; + } + }catch(err) { + console.error("Error got when init Douyin bot", err); } const hero = new Hero(options); - hero.use(ClientLogPlugin); //开启log - await hero.goto(url, configs.heroBotOptions); - - //等待所有内容加载完成 - const tab = await hero.activeTab; - await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - await hero.waitForState({ - all(assert) { - assert( - hero.document.title, - text => text != '', - ); - } - }); - - //解析网页HTML数据 - data.title = await hero.document.title; - //data.url = await hero.url; - - const elems = await hero.document.querySelectorAll('meta'); - let meta_name = ''; - for (const elem of elems) { - meta_name = await elem.getAttribute('name'); - if (!meta_name) {continue;} - meta_name = meta_name.toLowerCase(); - //console.log('meta', meta_name); - if (meta_name.indexOf('video_cover_image_url') > -1) { - data.cover = await elem.getAttribute('content'); - }else if (meta_name.indexOf('video_title') > -1) { - data.title = await elem.getAttribute('content'); - } - } - //get cover image's base64 data - if (typeof(data.cover) != 'undefined' && data.cover) { - data.cover = common.getAbsoluteUrl(data.cover); + try { + hero.use(ClientLogPlugin); //开启log + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + await hero.waitForState({ + all(assert) { + assert( + hero.document.title, + text => text != '', + ); + } + }); + + //解析网页HTML数据 + data.title = await hero.document.title; + //data.url = await hero.url; + + const elems = await hero.document.querySelectorAll('meta'); + let meta_name = ''; + for (const elem of elems) { + meta_name = await elem.getAttribute('name'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + //console.log('meta', meta_name); + if (meta_name.indexOf('video_cover_image_url') > -1) { + data.cover = await elem.getAttribute('content'); + }else if (meta_name.indexOf('video_title') > -1) { + data.title = await elem.getAttribute('content'); + } + } - const response = await hero.goto(data.cover); - const imgBuffer = await response.buffer; - //console.log('Cover image fetch done', imgBuffer.toString('base64')); - if (imgBuffer) { - data.cover_base64 = imgBuffer.toString('base64'); - data.cover_type = common.getImageType(data.cover); + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } } - } - await hero.close(); + await hero.close(); - data.done = true; - }catch(error) { - console.error("Error got when request %s via hero: %s", url, error); - } + data.bot = this.name; + data.done = true; + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + await hero.close(); + } - return data; - } + return data; + } } diff --git a/bot/HeroBot.mjs b/bot/HeroBot.mjs index b1f0093..14e6366 100644 --- a/bot/HeroBot.mjs +++ b/bot/HeroBot.mjs @@ -5,31 +5,30 @@ import { fileURLToPath } from 'url'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; class HeroBot { - constructor(heroCloudServer) { - this.heroServer = heroCloudServer ? heroCloudServer : ''; - - this.supportedBots = { - douyin: 'https://www.douyin.com', - kuaishou: 'https://www.kuaishou.com', - xigua: 'https://www.ixigua.com', - bilibili: 'https://www.bilibili.com', - }; - - this.name = ''; + constructor(heroCloudServer) { + this.heroServer = heroCloudServer ? heroCloudServer : ''; + + this.supportedBots = { + douyin: 'https://www.douyin.com', + kuaishou: 'https://www.kuaishou.com', + xigua: 'https://www.ixigua.com', + bilibili: 'https://www.bilibili.com', + }; - const __filename = fileURLToPath(import.meta.url); - this.root = path.dirname(__filename); - } + this.name = ''; - //返回profile对象 - async init(botName) { - if (typeof(this.supportedBots[botName]) == 'undefined') { - return false; + const __filename = fileURLToPath(import.meta.url); + this.root = path.dirname(__filename); } - const base_url = this.supportedBots[botName]; + //返回profile对象 + async init(botName) { + if (typeof(this.supportedBots[botName]) == 'undefined') { + return false; + } + + const base_url = this.supportedBots[botName]; - try { this.name = botName; let options = { @@ -48,62 +47,64 @@ class HeroBot { } //console.log('Hero init配置', configs); - const hero = new Hero(options); - hero.use(ClientLogPlugin); //开启log - await hero.goto(base_url, configs.heroBotOptions); - //等待所有内容加载完成 - const tab = await hero.activeTab; - await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + try { + hero.use(ClientLogPlugin); //开启log + await hero.goto(base_url, configs.heroBotOptions); - //保存profile - const latestUserProfile = await hero.exportUserProfile(); - this.saveProfile(latestUserProfile); + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - await hero.close(); - - return latestUserProfile; - }catch(error) { - console.error("Error got when request %s via hero: %s", base_url, error); - } + //保存profile + const latestUserProfile = await hero.exportUserProfile(); + this.saveProfile(latestUserProfile); - return false; - } + await hero.close(); - //保存profile - saveProfile(profile) { - if (this.name == '') {return false;} - - const botName = this.name; + return latestUserProfile; + }catch(error) { + console.error("Error got when bot init with %s via hero, error: %s", base_url, error); + await hero.close(); + } - try { - //保存profile - const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`); - profile = this.fixCookies(profile); - fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2)); - }catch(error) { - console.error("Error got when save profile of %s, error detail:\n%s", botName, error); return false; } - return true; - } + //保存profile + saveProfile(profile) { + if (this.name == '') {return false;} - //处理name为空的cookie - fixCookies(profile) { - let fixedProfile = profile; - if (typeof(profile.cookies) == 'undefined') {return profile;} + const botName = this.name; - const botName = this.name; - for (const index in profile.cookies) { - if (profile.cookies[index].name == '') { - fixedProfile.cookies[index].name = botName; + try { + //保存profile + const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`); + profile = this.fixCookies(profile); + fs.writeFileSync(profilePath, JSON.stringify(profile, null, 2)); + }catch(error) { + console.error("Error got when save profile of %s, error detail:\n%s", botName, error); + return false; } + + return true; } - return fixedProfile; - } + //处理name为空的cookie + fixCookies(profile) { + let fixedProfile = profile; + if (typeof(profile.cookies) == 'undefined') {return profile;} + + const botName = this.name; + for (const index in profile.cookies) { + if (profile.cookies[index].name == '') { + fixedProfile.cookies[index].name = botName; + } + } + + return fixedProfile; + } } diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index ebb440f..945495e 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; class Kuaishou extends HeroBot { - async scrap(url) { - let data = {url: url, done: false}; + async scrap(url) { + let data = {url: url, done: false}; - try { let options = { userAgent: configs.userAgent, viewport: configs.viewport @@ -18,52 +17,59 @@ class Kuaishou extends HeroBot { options.connectionToCore = this.heroServer; } - const profile = await this.init('kuaishou'); - if (profile) { - options.userProfile = profile; + try { + const profile = await this.init('kuaishou'); + if (profile) { + options.userProfile = profile; + } + }catch(err) { + console.error("Error got when init Kuaishou bot", err); } const hero = new Hero(options); - hero.use(ClientLogPlugin); //开启log - await hero.goto(url, configs.heroBotOptions); - - //等待所有内容加载完成 - const tab = await hero.activeTab; - await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - await hero.waitForPaintingStable(); - - //解析网页HTML数据 - data.title = await hero.document.title; - //data.url = await hero.url; - - const elem = hero.document.querySelector('.video-container-player'); - if (elem) { - data.cover = await elem.getAttribute('poster'); - } - - //get cover image's base64 data - if (typeof(data.cover) != 'undefined' && data.cover) { - data.cover = common.getAbsoluteUrl(data.cover); + + try { + hero.use(ClientLogPlugin); //开启log + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + await hero.waitForPaintingStable(); + + //解析网页HTML数据 + data.title = await hero.document.title; + //data.url = await hero.url; + + const elem = hero.document.querySelector('.video-container-player'); + if (elem) { + data.cover = await elem.getAttribute('poster'); + } - const response = await hero.goto(data.cover); - const imgBuffer = await response.buffer; - //console.log('Cover image fetch done', imgBuffer.toString('base64')); - if (imgBuffer) { - data.cover_base64 = imgBuffer.toString('base64'); - data.cover_type = common.getImageType(data.cover); + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } } - } - await hero.close(); + await hero.close(); - data.bot = this.name; - data.done = true; - }catch(error) { - console.error("Error got when request %s via hero: %s", url, error); - }; + data.bot = this.name; + data.done = true; + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + await hero.close(); + }; - return data; - } + return data; + } } diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs index 264b1ff..394c22c 100644 --- a/bot/Xigua.mjs +++ b/bot/Xigua.mjs @@ -5,10 +5,9 @@ import common from '../lib/common.mjs'; class Xigua extends HeroBot { - async scrap(url) { - let data = {url: url, done: false}; + async scrap(url) { + let data = {url: url, done: false}; - try { let options = { userAgent: configs.userAgent, viewport: configs.viewport @@ -18,61 +17,68 @@ class Xigua extends HeroBot { options.connectionToCore = this.heroServer; } - const profile = await this.init('xigua'); - if (profile) { - options.userProfile = profile; + try { + const profile = await this.init('xigua'); + if (profile) { + options.userProfile = profile; + } + }catch(err) { + console.error("Error got when init Xigua bot", err); } const hero = new Hero(options); - hero.use(ClientLogPlugin); //开启log - await hero.goto(url, configs.heroBotOptions); - - //等待所有内容加载完成 - const tab = await hero.activeTab; - await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - await hero.waitForPaintingStable(); - - //解析网页HTML数据 - data.title = await hero.document.title; - //data.url = await hero.url; - - const elems = await hero.document.querySelectorAll('meta'); - let meta_name = ''; - for (const elem of elems) { - meta_name = await elem.getAttribute('name'); - if (!meta_name) {continue;} - meta_name = meta_name.toLowerCase(); - //console.log('meta', meta_name); - if (meta_name.indexOf('og:image') > -1) { - data.cover = await elem.getAttribute('content'); - }else if (meta_name.indexOf('og:title') > -1) { - data.title = await elem.getAttribute('content'); - } - } - //get cover image's base64 data - if (typeof(data.cover) != 'undefined' && data.cover) { - data.cover = common.getAbsoluteUrl(data.cover); + try { + hero.use(ClientLogPlugin); //开启log + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + await hero.waitForPaintingStable(); + + //解析网页HTML数据 + data.title = await hero.document.title; + //data.url = await hero.url; + + const elems = await hero.document.querySelectorAll('meta'); + let meta_name = ''; + for (const elem of elems) { + meta_name = await elem.getAttribute('name'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + //console.log('meta', meta_name); + if (meta_name.indexOf('og:image') > -1) { + data.cover = await elem.getAttribute('content'); + }else if (meta_name.indexOf('og:title') > -1) { + data.title = await elem.getAttribute('content'); + } + } - const response = await hero.goto(data.cover); - const imgBuffer = await response.buffer; - //console.log('Cover image fetch done', imgBuffer.toString('base64')); - if (imgBuffer) { - data.cover_base64 = imgBuffer.toString('base64'); - data.cover_type = common.getImageType(data.cover); + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } } - } - await hero.close(); + await hero.close(); - data.bot = this.name; - data.done = true; - }catch(error) { - console.error("Error got when request %s via hero: %s", url, error); - }; + data.bot = this.name; + data.done = true; + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + await hero.close(); + }; - return data; - } + return data; + } }