Browse Source

douyin livestream support

master
filesite 6 months ago
parent
commit
70bf6ebf25
  1. 20
      bot/Douyin.mjs
  2. 13
      config.mjs
  3. 5
      lib/common.mjs
  4. 6
      spider.mjs
  5. 3
      test/cloud_test.mjs
  6. 40
      test/douyin_test.mjs
  7. 10
      test/scrap_test.mjs

20
bot/Douyin.mjs

@ -59,13 +59,27 @@ class Douyin extends HeroBot {
}, {timeoutMs: configs.heroTabOptions.timeoutMs}); }, {timeoutMs: configs.heroTabOptions.timeoutMs});
} }
let rnd_secods = 10 + parseInt(Math.random() * 10);
console.log("Sleep %s seconds...", rnd_secods);
await common.delay(rnd_secods);
//解析网页HTML数据 //解析网页HTML数据
data.title = await hero.document.title; data.title = await hero.document.title;
if (this.ua == 'mob') { if (this.ua == 'mob') {
//手机版网页解析 //手机版网页解析
const imgElem = await hero.querySelector('.video-container img.poster'); let imgElem = await hero.querySelector('.video-container img.poster');
let elType = 'image';
if (!imgElem) {
//尝试去抓取video的poster属性
imgElem = await hero.querySelector('.xgplayer video');
elType = 'video';
if (!imgElem) {
//尝试获取用户头像作为封面图,兼容直播页面
imgElem = await hero.querySelector('.avatar-component-avatar-container img');
elType = 'image';
if (!imgElem) { if (!imgElem) {
console.error('HTML解析出错,找不到封面图', data); console.error('HTML解析出错,找不到封面图', data);
await hero.close(); await hero.close();
@ -73,8 +87,10 @@ class Douyin extends HeroBot {
await this.deleteProfile(); await this.deleteProfile();
return false; return false;
} }
}
}
data.cover = await imgElem.src; data.cover = elType == 'image' ? await imgElem.src : await imgElem.poster;
}else { }else {
//pc版网页解析 //pc版网页解析
const elems = await hero.document.querySelectorAll('meta'); const elems = await hero.document.querySelectorAll('meta');

13
config.mjs

@ -38,10 +38,10 @@ let configs = {
//可选项参考官方文档:https://ulixee.org/docs/hero/basic-client/hero //可选项参考官方文档:https://ulixee.org/docs/hero/basic-client/hero
botOptions: { botOptions: {
showChrome: false, showChrome: true,
showChromeInteractions: false, showChromeInteractions: true,
showDevtools: false, showDevtools: true,
showChromeAlive: false, showChromeAlive: true,
}, },
viewports: { viewports: {
@ -76,10 +76,9 @@ let configs = {
}; };
async function getConfig() { async function getConfig(configFile) {
//自定义JSON格式配置文件支持 //自定义JSON格式配置文件支持
if (process.argv.length >= 3) { if (typeof(configFile) != 'undefined' && configFile) {
let configFile = process.argv[2];
let myConfigs = await common.loadCustomizeConfig(configFile); let myConfigs = await common.loadCustomizeConfig(configFile);
if (myConfigs) { if (myConfigs) {
configs = common.mergeConfigs(myConfigs, configs); configs = common.mergeConfigs(myConfigs, configs);

5
lib/common.mjs

@ -1,6 +1,7 @@
//公用方法 //公用方法
import { readdir, readFile, rm as removeFile, appendFile } from 'node:fs/promises'; import { readdir, readFile, rm as removeFile, appendFile } from 'node:fs/promises';
import path from 'node:path'; import path from 'node:path';
import { setTimeout } from 'node:timers/promises';
export default { export default {
@ -108,6 +109,10 @@ export default {
} }
return saved; return saved;
},
delay: async function(seconds) {
await setTimeout(seconds * 1000);
} }
}; };

6
spider.mjs

@ -22,7 +22,11 @@ import path from 'node:path';
(async () => { (async () => {
//设置configs为全局变量 //设置configs为全局变量
global.configs = await getConfigs(); let configFile = '';
if (process.argv.length >= 3) {
configFile = process.argv[2];
}
global.configs = await getConfigs(configFile);
const taskMoniter = new TaskMoniter(configs.task_list_dir); const taskMoniter = new TaskMoniter(configs.task_list_dir);
const tajian = new TaJian(configs.data_save_dir); const tajian = new TaJian(configs.data_save_dir);

3
test/cloud_test.mjs

@ -5,7 +5,8 @@ import Hero from '@ulixee/hero';
//const url = 'https://filesite.io'; //const url = 'https://filesite.io';
//const url = 'https://www.google.com'; //const url = 'https://www.google.com';
const url = 'https://v.douyin.com/iJr1NsJJ/'; let url = 'https://v.douyin.com/iJr1NsJJ/';
await hero.goto(url, { await hero.goto(url, {
timeoutMs: 20000, timeoutMs: 20000,
referrer: '-' referrer: '-'

40
test/douyin_test.mjs

@ -1,6 +1,8 @@
import Hero from '@ulixee/hero'; import Hero from '@ulixee/hero';
(async () => { (async () => {
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
const hero = new Hero({ const hero = new Hero({
connectionToCore: 'ws://127.0.0.1:1818', connectionToCore: 'ws://127.0.0.1:1818',
@ -24,7 +26,9 @@ import Hero from '@ulixee/hero';
showChromeAlive: true, showChromeAlive: true,
}); });
const url = 'https://v.douyin.com/i2PBaR5B/'; let url = 'https://v.douyin.com/i2PBaR5B/';
//直播地址测试
url = 'https://v.douyin.com/i2WaMoAN/';
console.log("请求 %s 中。。。", url); console.log("请求 %s 中。。。", url);
await hero.goto(url, { await hero.goto(url, {
timeoutMs: 60000, timeoutMs: 60000,
@ -42,15 +46,17 @@ import Hero from '@ulixee/hero';
//await tab.waitForLoad('AllContentLoaded', {timeoutMs: 30000}); //await tab.waitForLoad('AllContentLoaded', {timeoutMs: 30000});
await tab.waitForLoad('DomContentLoaded', {timeoutMs: 30000}); await tab.waitForLoad('DomContentLoaded', {timeoutMs: 30000});
//await hero.waitForState({ /*
// all(assert) { await hero.waitForState({
// assert( all(assert) {
// hero.detach( hero.document.querySelectorAll('img.poster') ), assert(
// els => els && els.length > 0, hero.detach( hero.document.querySelectorAll('img.poster') ),
// ); els => els && els.length > 0,
// } );
// }, {timeoutMs: 20000}); }
//console.log('poster封面图标签已经准备好'); }, {timeoutMs: 20000});
console.log('poster封面图标签已经准备好');
*/
console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded); console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded);
@ -59,16 +65,26 @@ import Hero from '@ulixee/hero';
let doc_url = await hero.document.location.href; let doc_url = await hero.document.location.href;
console.log('网址', doc_url); console.log('网址', doc_url);
let rnd_secods = parseInt(Math.random() * 20);
console.log("Sleep %s seconds...", rnd_secods);
await delay(1000*rnd_secods); //sleep
//let doc_html = await hero.document.body.innerHTML; //let doc_html = await hero.document.body.innerHTML;
//console.log('网页内容', doc_html); //console.log('网页内容', doc_html);
let title = await hero.document.title; let title = await hero.document.title;
console.log('网页标题', title); console.log('网页标题', title);
const elem = await hero.querySelector('.video-container img.poster'); let elem = await hero.querySelector('.video-container img.poster');
elem = await hero.querySelector('.xgplayer video');
let imgUrl = ''; let imgUrl = '';
imgUrl = await elem.src; //imgUrl = await elem.src;
if (elem) {
imgUrl = await elem.poster;
console.log('post image url: %s', imgUrl); console.log('post image url: %s', imgUrl);
}
//const elems = await hero.detach( hero.document.querySelectorAll('meta') ); //const elems = await hero.detach( hero.document.querySelectorAll('meta') );
//const elems = await hero.document.querySelectorAll('meta'); //const elems = await hero.document.querySelectorAll('meta');

10
test/scrap_test.mjs

@ -24,17 +24,19 @@ import getConfigs from '../config.mjs';
//抖音测试 //抖音测试
url = 'https://v.douyin.com/i2PBaR5B/'; //mob and pc url = 'https://v.douyin.com/i2PBaR5B/'; //mob and pc
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时 //直播地址测试
url = 'https://v.douyin.com/i2WaMoAN/';
configs.userAgent = configs.userAgents.mac_chrome; configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
configs.viewport = configs.viewports.pc;
console.log('Hero配置', configs); //configs.userAgent = configs.userAgents.mac_chrome;
//configs.viewport = configs.viewports.pc;
const douyin = new Douyin(heroCloudServer); const douyin = new Douyin(heroCloudServer);
//使用手机模式,默认为pc //使用手机模式,默认为pc
douyin.setMode('mob'); douyin.setMode('mob');
console.log('Hero配置', configs);
console.log('请求中: %s ...', url); console.log('请求中: %s ...', url);
data = await douyin.scrap(url); data = await douyin.scrap(url);

Loading…
Cancel
Save