filesite
1 year ago
2 changed files with 66 additions and 5 deletions
@ -0,0 +1,49 @@
@@ -0,0 +1,49 @@
|
||||
import Hero from '@ulixee/hero'; |
||||
import configs from '../config.mjs'; |
||||
|
||||
class Xigua { |
||||
constructor(heroCloudServer) { |
||||
this.heroServer = heroCloudServer ? heroCloudServer : ''; |
||||
} |
||||
|
||||
async scrap(url) { |
||||
let data = {}; |
||||
|
||||
try { |
||||
let options = {}; |
||||
if (this.heroServer) { |
||||
options.connectionToCore = this.heroServer; |
||||
} |
||||
|
||||
const hero = new Hero(options); |
||||
await hero.goto(url, configs.heroBotOptions); |
||||
|
||||
//等待所有内容加载完成
|
||||
const tab = await hero.activeTab; |
||||
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); |
||||
|
||||
//解析网页HTML数据
|
||||
const elems = await hero.detach( hero.document.querySelectorAll('meta') ); |
||||
let meta_name = ''; |
||||
for (const elem of elems) { |
||||
meta_name = elem.getAttribute('name'); |
||||
if (!meta_name) {continue;} |
||||
meta_name = meta_name.toLowerCase(); |
||||
if (meta_name.indexOf('og:image') > -1) { |
||||
data.cover = elem.getAttribute('content'); |
||||
}else if (meta_name.indexOf('og:title') > -1) { |
||||
data.title = elem.getAttribute('content'); |
||||
} |
||||
} |
||||
|
||||
await hero.close(); |
||||
}catch(error) { |
||||
console.error("Error got when request %s via hero: %s", url, error); |
||||
}; |
||||
|
||||
return data; |
||||
} |
||||
|
||||
} |
||||
|
||||
export default Xigua; |
Loading…
Reference in new issue