Browse Source

bot kuaishou support share url

master
filesite 1 year ago
parent
commit
2ba9473df5
  1. 1
      .gitignore
  2. 1
      README.md
  3. 18
      bot/Kuaishou.mjs
  4. 6
      config.mjs
  5. 33
      test/scrap_test.mjs
  6. 5
      tmp/README.md

1
.gitignore vendored

@ -1,2 +1,3 @@ @@ -1,2 +1,3 @@
package-lock.json
node_modules/
tmp/profile*.json

1
README.md

@ -19,6 +19,7 @@ Hero scripts of machete. @@ -19,6 +19,7 @@ Hero scripts of machete.
* bot - 针对各大平台的网页HTML解析类
* bypass - 针对各大平台的常用域名收集
* test - 类库测试代码
* tmp - 临时文件保存目录
* install_cloud.sh - hero服务端安装(非必需)
* install_hero.sh - hero客户端安装

18
bot/Kuaishou.mjs

@ -1,5 +1,7 @@ @@ -1,5 +1,7 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import fs from 'node:fs';
import path from 'node:path';
class Kuaishou {
constructor(heroCloudServer) {
@ -11,10 +13,19 @@ class Kuaishou { @@ -11,10 +13,19 @@ class Kuaishou {
try {
let options = {};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const profilePath = path.resolve('../tmp/', 'profile_kuaishou.json');
let saveProfile = false;
if (fs.existsSync(profilePath) != false) {
const json = fs.readFileSync(profilePath, { encoding: 'utf8' });
options.userProfile = JSON.parse(json);
saveProfile = true;
}
const hero = new Hero(options);
await hero.goto(url, configs.heroBotOptions);
@ -22,8 +33,15 @@ class Kuaishou { @@ -22,8 +33,15 @@ class Kuaishou {
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
if (saveProfile) {
const latestUserProfile = await hero.exportUserProfile();
fs.writeFileSync(profilePath, JSON.stringify(latestUserProfile, null, 2));
}
//解析网页HTML数据
data.title = await hero.document.title;
//data.url = await hero.url;
const elem = await hero.detach( hero.document.querySelector('.video-container-player') );
data.cover = elem.getAttribute('poster');

6
config.mjs

@ -3,8 +3,8 @@ export default { @@ -3,8 +3,8 @@ export default {
//请求参数
heroBotOptions: {
timeoutMs: 10000,
referrer: 'https://wechat.com',
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
referrer: '',
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 MicroMessenger/6.6.1 NetType/4G Language/zh_CN'
},
//网页tab参数
@ -15,7 +15,9 @@ export default { @@ -15,7 +15,9 @@ export default {
//常用浏览器user-agent
userAgents: {
iphone_chrome: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
iphone_wechat: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 MicroMessenger/6.6.1 NetType/4G Language/zh_CN',
mac_chrome: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36',
android_wechat: 'Mozilla/5.0 (Linux; Android 7.1.1; OD103 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 MicroMessenger/6.6.1.1220(0x26060135) NetType/4G Language/zh_CN',
}
};

33
test/scrap_test.mjs

@ -14,23 +14,28 @@ import configs from '../config.mjs'; @@ -14,23 +14,28 @@ import configs from '../config.mjs';
//console.log("解析结果:\n%s", JSON.stringify(data));
//快手测试
//configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时
//console.log('Hero配置', configs);
//const kuaishou = new Kuaishou(heroCloudServer);
//const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
//console.log('请求中: %s ...', url);
//const data = await kuaishou.scrap(url);
//console.log("解析结果:\n%s", JSON.stringify(data));
//西瓜测试
const url = 'https://www.ixigua.com/7092326495246516749';
configs.heroBotOptions.referrer = url;
configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome;
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
configs.heroBotOptions.userAgent = configs.userAgents.iphone_wechat;
//configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome;
console.log('Hero配置', configs);
const xigua = new Xigua(heroCloudServer);
const kuaishou = new Kuaishou(heroCloudServer);
//const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
const url = 'https://v.kuaishou.com/7zwqe6';
//const url = 'https://www.kuaishou.com/';
console.log('请求中: %s ...', url);
const data = await xigua.scrap(`${url}?wid_try=1`);
const data = await kuaishou.scrap(url);
console.log("解析结果:\n%s", JSON.stringify(data));
//西瓜测试
//const url = 'https://www.ixigua.com/7092326495246516749';
//configs.heroBotOptions.referrer = url;
//configs.heroBotOptions.userAgent = configs.userAgents.mac_chrome;
//console.log('Hero配置', configs);
//const xigua = new Xigua(heroCloudServer);
//console.log('请求中: %s ...', url);
//const data = await xigua.scrap(`${url}?wid_try=1`);
//console.log("解析结果:\n%s", JSON.stringify(data));
})();

5
tmp/README.md

@ -0,0 +1,5 @@ @@ -0,0 +1,5 @@
## 临时文件保存目录
* profile_xxx.json - 浏览器客户端缓存数据,如:cookie、localstorage等
Loading…
Cancel
Save