From 093424c6e0c74d271198903c8366e580a73787ed Mon Sep 17 00:00:00 2001 From: filesite Date: Mon, 11 Sep 2023 11:01:56 +0800 Subject: [PATCH] bot for xigua done --- bot/Xigua.mjs | 49 +++++++++++++++++++++++++++++++++++++++++++++ test/scrap_test.mjs | 22 +++++++++++++++----- 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 bot/Xigua.mjs diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs new file mode 100644 index 0000000..8f08e4e --- /dev/null +++ b/bot/Xigua.mjs @@ -0,0 +1,49 @@ +import Hero from '@ulixee/hero'; +import configs from '../config.mjs'; + +class Xigua { + constructor(heroCloudServer) { + this.heroServer = heroCloudServer ? heroCloudServer : ''; + } + + async scrap(url) { + let data = {}; + + try { + let options = {}; + if (this.heroServer) { + options.connectionToCore = this.heroServer; + } + + const hero = new Hero(options); + await hero.goto(url, configs.heroBotOptions); + + //等待所有内容加载完成 + const tab = await hero.activeTab; + await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + + //解析网页HTML数据 + const elems = await hero.detach( hero.document.querySelectorAll('meta') ); + let meta_name = ''; + for (const elem of elems) { + meta_name = elem.getAttribute('name'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + if (meta_name.indexOf('og:image') > -1) { + data.cover = elem.getAttribute('content'); + }else if (meta_name.indexOf('og:title') > -1) { + data.title = elem.getAttribute('content'); + } + } + + await hero.close(); + }catch(error) { + console.error("Error got when request %s via hero: %s", url, error); + }; + + return data; + } + +} + +export default Xigua; diff --git a/test/scrap_test.mjs b/test/scrap_test.mjs index 478cc3e..b617751 100644 --- a/test/scrap_test.mjs +++ b/test/scrap_test.mjs @@ -1,10 +1,9 @@ import Douyin from '../bot/Douyin.mjs'; import Kuaishou from '../bot/Kuaishou.mjs'; +import Xigua from '../bot/Xigua.mjs'; import configs from '../config.mjs'; (async () => { - configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时 - console.log('Hero配置', configs); const heroCloudServer = 'ws://192.168.3.13:1818'; //抖音测试 @@ -15,10 +14,23 @@ import configs from '../config.mjs'; //console.log("解析结果:\n%s", JSON.stringify(data)); //快手测试 - const kuaishou = new Kuaishou(heroCloudServer); - const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; + //configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时 + //console.log('Hero配置', configs); + //const kuaishou = new Kuaishou(heroCloudServer); + //const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; + //console.log('请求中: %s ...', url); + //const data = await kuaishou.scrap(url); + //console.log("解析结果:\n%s", JSON.stringify(data)); + + //西瓜测试 + const url = 'https://www.ixigua.com/7092326495246516749'; + configs.heroBotOptions.referrer = url; + configs.heroBotOptions.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'; + console.log('Hero配置', configs); + + const xigua = new Xigua(heroCloudServer); console.log('请求中: %s ...', url); - const data = await kuaishou.scrap(url); + const data = await xigua.scrap(`${url}?wid_try=1`); console.log("解析结果:\n%s", JSON.stringify(data)); })();