From e6ae88b159dc149c8b0ce6b3d2c5a4fcac6e3a11 Mon Sep 17 00:00:00 2001 From: filesite Date: Mon, 11 Sep 2023 18:24:59 +0800 Subject: [PATCH] bot for bilibili done --- bot/Bilibili.mjs | 68 +++++++++++++++++++++++++++++++++++++++++++++ test/scrap_test.mjs | 22 +++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 bot/Bilibili.mjs diff --git a/bot/Bilibili.mjs b/bot/Bilibili.mjs new file mode 100644 index 0000000..fa5a6b1 --- /dev/null +++ b/bot/Bilibili.mjs @@ -0,0 +1,68 @@ +import Hero from '@ulixee/hero'; +import configs from '../config.mjs'; +import HeroBot from './HeroBot.mjs'; + +class Bilibili extends HeroBot { + + async scrap(url) { + let data = {}; + + try { + let options = { + userAgent: configs.userAgent, + viewport: configs.viewport + }; + + if (this.heroServer) { + options.connectionToCore = this.heroServer; + } + + const profile = await this.init('bilibili'); + if (profile) { + options.userProfile = profile; + } + + const hero = new Hero(options); + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + await hero.waitForState({ + all(assert) { + assert( + hero.document.title, + text => text != '', + ); + } + }); + + //解析网页HTML数据 + data.title = await hero.document.title; + //data.url = await hero.url; + + const elems = await hero.document.querySelectorAll('meta'); + let meta_name = ''; + for (const elem of elems) { + meta_name = await elem.getAttribute('property'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + //console.log('meta', meta_name); + if (meta_name.indexOf('og:image') > -1) { + data.cover = await elem.getAttribute('content'); + }else if (meta_name.indexOf('og:title') > -1) { + data.title = await elem.getAttribute('content'); + } + } + + await hero.close(); + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + } + + return data; + } + +} + +export default Bilibili; diff --git a/test/scrap_test.mjs b/test/scrap_test.mjs index 3d8f29c..c011ed1 100644 --- a/test/scrap_test.mjs +++ b/test/scrap_test.mjs @@ -1,6 +1,7 @@ import Douyin from '../bot/Douyin.mjs'; import Kuaishou from '../bot/Kuaishou.mjs'; import Xigua from '../bot/Xigua.mjs'; +import Bilibili from '../bot/Bilibili.mjs'; import configs from '../config.mjs'; (async () => { @@ -72,6 +73,27 @@ import configs from '../config.mjs'; console.log("解析结果:\n%s", JSON.stringify(data)); break; + + + case 'bilibili': + //抖音测试 + url = 'https://www.bilibili.com/video/BV1ep4y1J75y/?share_source=copy_web&vd_source=00bead49a4c2df31bbf3e609d7d95899'; //pc + url = 'https://b23.tv/Lo0jIEt'; //mob + + configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时 + + configs.userAgent = configs.userAgents.mac_chrome; + configs.viewport = configs.viewports.pc; + + console.log('Hero配置', configs); + + const bilibili = new Bilibili(heroCloudServer); + console.log('请求中: %s ...', url); + data = await bilibili.scrap(url); + console.log("解析结果:\n%s", JSON.stringify(data)); + + break; + } process.exit(0);