Browse Source

bot for xigua done

master
filesite 1 year ago
parent
commit
093424c6e0
  1. 49
      bot/Xigua.mjs
  2. 22
      test/scrap_test.mjs

49
bot/Xigua.mjs

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
import Hero from '@ulixee/hero';
import configs from '../config.mjs';
class Xigua {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
}
async scrap(url) {
let data = {};
try {
let options = {};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const hero = new Hero(options);
await hero.goto(url, configs.heroBotOptions);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
//解析网页HTML数据
const elems = await hero.detach( hero.document.querySelectorAll('meta') );
let meta_name = '';
for (const elem of elems) {
meta_name = elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
if (meta_name.indexOf('og:image') > -1) {
data.cover = elem.getAttribute('content');
}else if (meta_name.indexOf('og:title') > -1) {
data.title = elem.getAttribute('content');
}
}
await hero.close();
}catch(error) {
console.error("Error got when request %s via hero: %s", url, error);
};
return data;
}
}
export default Xigua;

22
test/scrap_test.mjs

@ -1,10 +1,9 @@ @@ -1,10 +1,9 @@
import Douyin from '../bot/Douyin.mjs';
import Kuaishou from '../bot/Kuaishou.mjs';
import Xigua from '../bot/Xigua.mjs';
import configs from '../config.mjs';
(async () => {
configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时
console.log('Hero配置', configs);
const heroCloudServer = 'ws://192.168.3.13:1818';
//抖音测试
@ -15,10 +14,23 @@ import configs from '../config.mjs'; @@ -15,10 +14,23 @@ import configs from '../config.mjs';
//console.log("解析结果:\n%s", JSON.stringify(data));
//快手测试
const kuaishou = new Kuaishou(heroCloudServer);
const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
//configs.heroTabOptions.timeoutMs = 10000; //所有内容加载完成超时
//console.log('Hero配置', configs);
//const kuaishou = new Kuaishou(heroCloudServer);
//const url = 'https://www.kuaishou.com/f/X8FTguiIjZQVwE7';
//console.log('请求中: %s ...', url);
//const data = await kuaishou.scrap(url);
//console.log("解析结果:\n%s", JSON.stringify(data));
//西瓜测试
const url = 'https://www.ixigua.com/7092326495246516749';
configs.heroBotOptions.referrer = url;
configs.heroBotOptions.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36';
console.log('Hero配置', configs);
const xigua = new Xigua(heroCloudServer);
console.log('请求中: %s ...', url);
const data = await kuaishou.scrap(url);
const data = await xigua.scrap(`${url}?wid_try=1`);
console.log("解析结果:\n%s", JSON.stringify(data));
})();

Loading…
Cancel
Save