Browse Source

bot for bilibili done

master
filesite 1 year ago
parent
commit
e6ae88b159
  1. 68
      bot/Bilibili.mjs
  2. 22
      test/scrap_test.mjs

68
bot/Bilibili.mjs

@ -0,0 +1,68 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
class Bilibili extends HeroBot {
async scrap(url) {
let data = {};
try {
let options = {
userAgent: configs.userAgent,
viewport: configs.viewport
};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const profile = await this.init('bilibili');
if (profile) {
options.userProfile = profile;
}
const hero = new Hero(options);
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
await hero.waitForState({
all(assert) {
assert(
hero.document.title,
text => text != '',
);
}
});
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elems = await hero.document.querySelectorAll('meta');
let meta_name = '';
for (const elem of elems) {
meta_name = await elem.getAttribute('property');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
//console.log('meta', meta_name);
if (meta_name.indexOf('og:image') > -1) {
data.cover = await elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = await elem.getAttribute('content');
}
}
await hero.close();
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
}
return data;
}
}
export default Bilibili;

22
test/scrap_test.mjs

@ -1,6 +1,7 @@
import Douyin from '../bot/Douyin.mjs'; import Douyin from '../bot/Douyin.mjs';
import Kuaishou from '../bot/Kuaishou.mjs'; import Kuaishou from '../bot/Kuaishou.mjs';
import Xigua from '../bot/Xigua.mjs'; import Xigua from '../bot/Xigua.mjs';
import Bilibili from '../bot/Bilibili.mjs';
import configs from '../config.mjs'; import configs from '../config.mjs';
(async () => { (async () => {
@ -72,6 +73,27 @@ import configs from '../config.mjs';
console.log("解析结果:\n%s", JSON.stringify(data)); console.log("解析结果:\n%s", JSON.stringify(data));
break; break;
case 'bilibili':
//抖音测试
url = 'https://www.bilibili.com/video/BV1ep4y1J75y/?share_source=copy_web&vd_source=00bead49a4c2df31bbf3e609d7d95899'; //pc
url = 'https://b23.tv/Lo0jIEt'; //mob
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
configs.userAgent = configs.userAgents.mac_chrome;
configs.viewport = configs.viewports.pc;
console.log('Hero配置', configs);
const bilibili = new Bilibili(heroCloudServer);
console.log('请求中: %s ...', url);
data = await bilibili.scrap(url);
console.log("解析结果:\n%s", JSON.stringify(data));
break;
} }
process.exit(0); process.exit(0);

Loading…
Cancel
Save