diff --git a/bot/Bilibili.mjs b/bot/Bilibili.mjs index 402a7b5..b323d45 100644 --- a/bot/Bilibili.mjs +++ b/bot/Bilibili.mjs @@ -87,6 +87,8 @@ class Bilibili extends HeroBot { }catch(error) { console.error("Error got when request %s via hero: %s", url, error); await hero.close(); + //删除profile文件后重试 + await this.deleteProfile(); } return data; diff --git a/bot/Douyin.mjs b/bot/Douyin.mjs index ccad328..4f17d82 100644 --- a/bot/Douyin.mjs +++ b/bot/Douyin.mjs @@ -9,8 +9,10 @@ class Douyin extends HeroBot { let data = {url: url, done: false}; //use iphone - configs.userAgent = configs.userAgents.iphone_safari; - configs.viewport = configs.viewports.mob; + if (this.ua == 'mob') { + configs.userAgent = configs.userAgents.iphone_safari; + configs.viewport = configs.viewports.mob; + } let options = { userAgent: configs.userAgent, //default mac os @@ -42,48 +44,54 @@ class Douyin extends HeroBot { const tab = await hero.activeTab; //for mob - await tab.waitForLoad('DomContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - - //for pc - //await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); - //await hero.waitForState({ - // all(assert) { - // assert( - // hero.document.title, - // text => text != '', - // ); - // } - //}, {timeoutMs: configs.heroTabOptions.timeoutMs}); + if (this.ua == 'mob') { + await tab.waitForLoad('DomContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + }else { + //for pc + await tab.waitForLoad('DomContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); //AllContentLoaded, DomContentLoaded + await hero.waitForState({ + all(assert) { + assert( + hero.document.title, + text => text != '', + ); + } + }, {timeoutMs: configs.heroTabOptions.timeoutMs}); + } + //解析网页HTML数据 data.title = await hero.document.title; - //pc版网页解析 - /* - const elems = await hero.document.querySelectorAll('meta'); - let meta_name = ''; - for (const elem of elems) { - meta_name = await elem.getAttribute('name'); - if (!meta_name) {continue;} - meta_name = meta_name.toLowerCase(); - //console.log('meta', meta_name); - if (meta_name.indexOf('video_cover_image_url') > -1) { - data.cover = await elem.getAttribute('content'); - }else if (meta_name.indexOf('video_title') > -1) { - data.title = await elem.getAttribute('content'); + if (this.ua == 'mob') { + //手机版网页解析 + const imgElem = await hero.querySelector('.video-container img.poster'); + if (!imgElem) { + console.error('HTML解析出错,找不到封面图', data); + await hero.close(); + //删除profile文件后重试 + await this.deleteProfile(); + this.ua = 'pc'; //切换到pc模式重试 + return false; + } + + data.cover = await imgElem.src; + }else { + //pc版网页解析 + const elems = await hero.document.querySelectorAll('meta'); + let meta_name = ''; + for (const elem of elems) { + meta_name = await elem.getAttribute('name'); + if (!meta_name) {continue;} + meta_name = meta_name.toLowerCase(); + if (meta_name.indexOf('video_cover_image_url') > -1) { + data.cover = await elem.getAttribute('content'); + }else if (meta_name.indexOf('video_title') > -1) { + data.title = await elem.getAttribute('content'); + } } - } - */ - - //手机版网页解析 - const imgElem = await hero.querySelector('.video-container img.poster'); - if (!imgElem) { - console.error('HTML解析出错,找不到封面图', data); - await hero.close(); - return false; } - data.cover = await imgElem.src; //get cover image's base64 data if (typeof(data.cover) != 'undefined' && data.cover) { @@ -105,6 +113,11 @@ class Douyin extends HeroBot { }catch(error) { console.error("Error got when request %s via hero: %s", url, error); await hero.close(); + + //删除profile文件后重试 + await this.deleteProfile(); + //切换模式 + this.ua = this.ua == 'mob' ? 'pc' : 'mob'; } return data; diff --git a/bot/HeroBot.mjs b/bot/HeroBot.mjs index 90eb688..7e7005a 100644 --- a/bot/HeroBot.mjs +++ b/bot/HeroBot.mjs @@ -1,5 +1,6 @@ import Hero from '@ulixee/hero'; import fs from 'node:fs'; +import {unlink} from 'node:fs/promises'; import path from 'node:path'; import { fileURLToPath } from 'url'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; @@ -20,6 +21,9 @@ class HeroBot { const __filename = fileURLToPath(import.meta.url); this.root = path.dirname(__filename); + + //记录浏览器模式,便于在需要的时候切换 + this.ua = 'pc'; } //返回profile对象 @@ -94,6 +98,25 @@ class HeroBot { return true; } + //删除profile + async deleteProfile() { + if (this.name == '') {return false;} + + const botName = this.name; + + try { + const profilePath = path.resolve(this.root, '../tmp/', `profile_${botName}.json`); + if (fs.existsSync(profilePath) != false) { + return await unlink(profilePath); + } + }catch(error) { + console.error("Error got when delete profile of %s, error detail:\n%s", botName, error); + return false; + } + + return true; + } + //处理name为空的cookie fixCookies(profile) { let fixedProfile = profile; diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index 397099b..311a184 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -68,6 +68,8 @@ class Kuaishou extends HeroBot { }catch(error) { console.error("Error got when request %s via hero: %s", url, error); await hero.close(); + //删除profile文件后重试 + await this.deleteProfile(); }; return data; diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs index aa4b9ed..4110279 100644 --- a/bot/Xigua.mjs +++ b/bot/Xigua.mjs @@ -41,7 +41,6 @@ class Xigua extends HeroBot { //解析网页HTML数据 data.title = await hero.document.title; - //data.url = await hero.url; const elems = await hero.document.querySelectorAll('meta'); let meta_name = ''; @@ -57,6 +56,15 @@ class Xigua extends HeroBot { } } + //尝试从 再获取一次 + if (typeof(data.cover) == 'undefined' || !data.cover) { + const imgTag = await tab.querySelector('xg-poster'); + let backgroundCss = await imgTag.style.backdgroundImage; + if (backgroundCss && /url\(.+\)/i.test(backgroundCss)) { + data.cover = backgroundCss.replace('url(', '').replace(')', '').replace('"', ''); + } + } + //get cover image's base64 data if (typeof(data.cover) != 'undefined' && data.cover) { data.cover = common.getAbsoluteUrl(data.cover); @@ -77,6 +85,8 @@ class Xigua extends HeroBot { }catch(error) { console.error("Error got when request %s via hero: %s", url, error); await hero.close(); + //删除profile文件后重试 + await this.deleteProfile(); }; return data; diff --git a/test/scrap_test.mjs b/test/scrap_test.mjs index ea9102d..a9c798c 100644 --- a/test/scrap_test.mjs +++ b/test/scrap_test.mjs @@ -61,7 +61,7 @@ import getConfigs from '../config.mjs'; case 'xigua': //西瓜测试 url = 'https://v.ixigua.com/ieUaqrFN/'; //mobile - url = 'https://www.ixigua.com/7248225527335813636'; //pc + url = 'https://www.ixigua.com/7343928492197118518'; //pc configs.heroBotOptions.referrer = url; configs.userAgent = configs.userAgents.mac_chrome;