diff --git a/.gitignore b/.gitignore index 15813be..b939fe3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ package-lock.json node_modules/ +tmp/profile*.json diff --git a/README.md b/README.md index 955a54b..9b6ab01 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Hero scripts of machete. * bot - 针对各大平台的网页HTML解析类 * bypass - 针对各大平台的常用域名收集 * test - 类库测试代码 +* tmp - 临时文件保存目录 * install_cloud.sh - hero服务端安装(非必需) * install_hero.sh - hero客户端安装 diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index c205670..abd035a 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -1,5 +1,7 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; +import fs from 'node:fs'; +import path from 'node:path'; class Kuaishou { constructor(heroCloudServer) { @@ -11,10 +13,19 @@ class Kuaishou { try { let options = {}; + if (this.heroServer) { options.connectionToCore = this.heroServer; } + const profilePath = path.resolve('../tmp/', 'profile_kuaishou.json'); + let saveProfile = false; + if (fs.existsSync(profilePath) != false) { + const json = fs.readFileSync(profilePath, { encoding: 'utf8' }); + options.userProfile = JSON.parse(json); + saveProfile = true; + } + const hero = new Hero(options); await hero.goto(url, configs.heroBotOptions); @@ -22,8 +33,15 @@ class Kuaishou { const tab = await hero.activeTab; await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + if (saveProfile) { + const latestUserProfile = await hero.exportUserProfile(); + fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2)); + } + //解析网页HTML数据 data.title = await hero.document.title; + //data.url = await hero.url; + const elem = await hero.detach( hero.document.querySelector('.video-container-player') ); data.cover = elem.getAttribute('poster'); diff --git a/config.mjs b/config.mjs index a759672..ee5512e 100644 --- a/config.mjs +++ b/config.mjs @@ -3,8 +3,8 @@ export default { //请求参数 heroBotOptions: { timeoutMs: 10000, - referrer: 'https://wechat.com', - userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' + referrer: '', + userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 MicroMessenger/6.6.1 NetType/4G Language/zh_CN' }, //网页tab参数 @@ -15,7 +15,9 @@ export default { //常用浏览器user-agent userAgents: { iphone_chrome: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1', + iphone_wechat: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 MicroMessenger/6.6.1 NetType/4G Language/zh_CN', mac_chrome: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', + android_wechat: 'Mozilla/5.0 (Linux; Android 7.1.1; OD103 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/4G Language/zh_CN', } }; diff --git a/test/scrap_test.mjs b/test/scrap_test.mjs index ac8d5d9..522fc5c 100644 --- a/test/scrap_test.mjs +++ b/test/scrap_test.mjs @@ -14,23 +14,28 @@ import configs from '../config.mjs'; //console.log("解析结果:\n%s", JSON.stringify(data)); //快手测试 - //configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时 - //console.log('Hero配置', configs); - //const kuaishou = new Kuaishou(heroCloudServer); - //const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; - //console.log('请求中: %s ...', url); - //const data = await kuaishou.scrap(url); - //console.log("解析结果:\n%s", JSON.stringify(data)); - - //西瓜测试 - const url = 'https://www.ixigua.com/7092326495246516749'; - configs.heroBotOptions.referrer = url; - configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; + configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时 + configs.heroBotOptions.userAgent = configs.userAgents.iphone_wechat; + //configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; console.log('Hero配置', configs); - const xigua = new Xigua(heroCloudServer); + const kuaishou = new Kuaishou(heroCloudServer); + //const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7'; + const url = 'https://v.kuaishou.com/7zwqe6'; + //const url = 'https://www.kuaishou.com/'; console.log('请求中: %s ...', url); - const data = await xigua.scrap(`${url}?wid_try=1`); + const data = await kuaishou.scrap(url); console.log("解析结果:\n%s", JSON.stringify(data)); + //西瓜测试 + //const url = 'https://www.ixigua.com/7092326495246516749'; + //configs.heroBotOptions.referrer = url; + //configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome; + //console.log('Hero配置', configs); + + //const xigua = new Xigua(heroCloudServer); + //console.log('请求中: %s ...', url); + //const data = await xigua.scrap(`${url}?wid_try=1`); + //console.log("解析结果:\n%s", JSON.stringify(data)); + })(); diff --git a/tmp/README.md b/tmp/README.md new file mode 100644 index 0000000..2b4dbbd --- /dev/null +++ b/tmp/README.md @@ -0,0 +1,5 @@ + +## 临时文件保存目录 + +* profile_xxx.json - 浏览器客户端缓存数据,如:cookie、localstorage等 +