filesite
1 year ago
2 changed files with 90 additions and 0 deletions
@ -0,0 +1,68 @@
@@ -0,0 +1,68 @@
|
||||
import Hero from '@ulixee/hero'; |
||||
import configs from '../config.mjs'; |
||||
import HeroBot from './HeroBot.mjs'; |
||||
|
||||
class Bilibili extends HeroBot { |
||||
|
||||
async scrap(url) { |
||||
let data = {}; |
||||
|
||||
try { |
||||
let options = { |
||||
userAgent: configs.userAgent, |
||||
viewport: configs.viewport |
||||
}; |
||||
|
||||
if (this.heroServer) { |
||||
options.connectionToCore = this.heroServer; |
||||
} |
||||
|
||||
const profile = await this.init('bilibili'); |
||||
if (profile) { |
||||
options.userProfile = profile; |
||||
} |
||||
|
||||
const hero = new Hero(options); |
||||
await hero.goto(url, configs.heroBotOptions); |
||||
|
||||
//等待所有内容加载完成
|
||||
const tab = await hero.activeTab; |
||||
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); |
||||
await hero.waitForState({ |
||||
all(assert) { |
||||
assert( |
||||
hero.document.title, |
||||
text => text != '', |
||||
); |
||||
} |
||||
}); |
||||
|
||||
//解析网页HTML数据
|
||||
data.title = await hero.document.title; |
||||
//data.url = await hero.url;
|
||||
|
||||
const elems = await hero.document.querySelectorAll('meta'); |
||||
let meta_name = ''; |
||||
for (const elem of elems) { |
||||
meta_name = await elem.getAttribute('property'); |
||||
if (!meta_name) {continue;} |
||||
meta_name = meta_name.toLowerCase(); |
||||
//console.log('meta', meta_name);
|
||||
if (meta_name.indexOf('og:image') > -1) { |
||||
data.cover = await elem.getAttribute('content'); |
||||
}else if (meta_name.indexOf('og:title') > -1) { |
||||
data.title = await elem.getAttribute('content'); |
||||
} |
||||
} |
||||
|
||||
await hero.close(); |
||||
}catch(error) { |
||||
console.error("Error got when request %s via hero: %s", url, error); |
||||
} |
||||
|
||||
return data; |
||||
} |
||||
|
||||
} |
||||
|
||||
export default Bilibili; |
Loading…
Reference in new issue