Browse Source

douyin livestream support

master
filesite 8 months ago
parent
commit
70bf6ebf25
  1. 30
      bot/Douyin.mjs
  2. 13
      config.mjs
  3. 5
      lib/common.mjs
  4. 6
      spider.mjs
  5. 27
      test/cloud_test.mjs
  6. 44
      test/douyin_test.mjs
  7. 10
      test/scrap_test.mjs

30
bot/Douyin.mjs

@ -59,22 +59,38 @@ class Douyin extends HeroBot { @@ -59,22 +59,38 @@ class Douyin extends HeroBot {
}, {timeoutMs: configs.heroTabOptions.timeoutMs});
}
let rnd_secods = 10 + parseInt(Math.random() * 10);
console.log("Sleep %s seconds...", rnd_secods);
await common.delay(rnd_secods);
//解析网页HTML数据
data.title = await hero.document.title;
if (this.ua == 'mob') {
//手机版网页解析
const imgElem = await hero.querySelector('.video-container img.poster');
let imgElem = await hero.querySelector('.video-container img.poster');
let elType = 'image';
if (!imgElem) {
console.error('HTML解析出错,找不到封面图', data);
await hero.close();
//删除profile文件后重试
await this.deleteProfile();
return false;
//尝试去抓取video的poster属性
imgElem = await hero.querySelector('.xgplayer video');
elType = 'video';
if (!imgElem) {
//尝试获取用户头像作为封面图,兼容直播页面
imgElem = await hero.querySelector('.avatar-component-avatar-container img');
elType = 'image';
if (!imgElem) {
console.error('HTML解析出错,找不到封面图', data);
await hero.close();
//删除profile文件后重试
await this.deleteProfile();
return false;
}
}
}
data.cover = await imgElem.src;
data.cover = elType == 'image' ? await imgElem.src : await imgElem.poster;
}else {
//pc版网页解析
const elems = await hero.document.querySelectorAll('meta');

13
config.mjs

@ -38,10 +38,10 @@ let configs = { @@ -38,10 +38,10 @@ let configs = {
//可选项参考官方文档:https://ulixee.org/docs/hero/basic-client/hero
botOptions: {
showChrome: false,
showChromeInteractions: false,
showDevtools: false,
showChromeAlive: false,
showChrome: true,
showChromeInteractions: true,
showDevtools: true,
showChromeAlive: true,
},
viewports: {
@ -76,10 +76,9 @@ let configs = { @@ -76,10 +76,9 @@ let configs = {
};
async function getConfig() {
async function getConfig(configFile) {
//自定义JSON格式配置文件支持
if (process.argv.length >= 3) {
let configFile = process.argv[2];
if (typeof(configFile) != 'undefined' && configFile) {
let myConfigs = await common.loadCustomizeConfig(configFile);
if (myConfigs) {
configs = common.mergeConfigs(myConfigs, configs);

5
lib/common.mjs

@ -1,6 +1,7 @@ @@ -1,6 +1,7 @@
//公用方法
import { readdir, readFile, rm as removeFile, appendFile } from 'node:fs/promises';
import path from 'node:path';
import { setTimeout } from 'node:timers/promises';
export default {
@ -108,6 +109,10 @@ export default { @@ -108,6 +109,10 @@ export default {
}
return saved;
},
delay: async function(seconds) {
await setTimeout(seconds * 1000);
}
};

6
spider.mjs

@ -22,7 +22,11 @@ import path from 'node:path'; @@ -22,7 +22,11 @@ import path from 'node:path';
(async () => {
//设置configs为全局变量
global.configs = await getConfigs();
let configFile = '';
if (process.argv.length >= 3) {
configFile = process.argv[2];
}
global.configs = await getConfigs(configFile);
const taskMoniter = new TaskMoniter(configs.task_list_dir);
const tajian = new TaJian(configs.data_save_dir);

27
test/cloud_test.mjs

@ -1,21 +1,22 @@ @@ -1,21 +1,22 @@
import Hero from '@ulixee/hero';
(async () => {
const hero = new Hero({ connectionToCore: 'ws://127.0.0.1:1818' });
const hero = new Hero({ connectionToCore: 'ws://127.0.0.1:1818' });
//const url = 'https://filesite.io';
//const url = 'https://www.google.com';
const url = 'https://v.douyin.com/iJr1NsJJ/';
await hero.goto(url, {
timeoutMs: 20000,
referrer: '-'
});
//const url = 'https://filesite.io';
//const url = 'https://www.google.com';
let url = 'https://v.douyin.com/iJr1NsJJ/';
const title = await hero.document.title;
console.log("Page title:\n", title);
await hero.goto(url, {
timeoutMs: 20000,
referrer: '-'
});
await hero.close();
const title = await hero.document.title;
console.log("Page title:\n", title);
await hero.close();
})().catch(error => {
console.error("Error got:\n%s", error);
process.exit(1);
console.error("Error got:\n%s", error);
process.exit(1);
});

44
test/douyin_test.mjs

@ -1,6 +1,8 @@ @@ -1,6 +1,8 @@
import Hero from '@ulixee/hero';
(async () => {
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
const hero = new Hero({
connectionToCore: 'ws://127.0.0.1:1818',
@ -24,7 +26,9 @@ import Hero from '@ulixee/hero'; @@ -24,7 +26,9 @@ import Hero from '@ulixee/hero';
showChromeAlive: true,
});
const url = 'https://v.douyin.com/i2PBaR5B/';
let url = 'https://v.douyin.com/i2PBaR5B/';
//直播地址测试
url = 'https://v.douyin.com/i2WaMoAN/';
console.log("请求 %s 中。。。", url);
await hero.goto(url, {
timeoutMs: 60000,
@ -42,15 +46,17 @@ import Hero from '@ulixee/hero'; @@ -42,15 +46,17 @@ import Hero from '@ulixee/hero';
//await tab.waitForLoad('AllContentLoaded', {timeoutMs: 30000});
await tab.waitForLoad('DomContentLoaded', {timeoutMs: 30000});
//await hero.waitForState({
// all(assert) {
// assert(
// hero.detach( hero.document.querySelectorAll('img.poster') ),
// els => els && els.length > 0,
// );
// }
// }, {timeoutMs: 20000});
//console.log('poster封面图标签已经准备好');
/*
await hero.waitForState({
all(assert) {
assert(
hero.detach( hero.document.querySelectorAll('img.poster') ),
els => els && els.length > 0,
);
}
}, {timeoutMs: 20000});
console.log('poster封面图标签已经准备好');
*/
console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded);
@ -59,16 +65,26 @@ import Hero from '@ulixee/hero'; @@ -59,16 +65,26 @@ import Hero from '@ulixee/hero';
let doc_url = await hero.document.location.href;
console.log('网址', doc_url);
let rnd_secods = parseInt(Math.random() * 20);
console.log("Sleep %s seconds...", rnd_secods);
await delay(1000*rnd_secods); //sleep
//let doc_html = await hero.document.body.innerHTML;
//console.log('网页内容', doc_html);
let title = await hero.document.title;
console.log('网页标题', title);
const elem = await hero.querySelector('.video-container img.poster');
let elem = await hero.querySelector('.video-container img.poster');
elem = await hero.querySelector('.xgplayer video');
let imgUrl = '';
imgUrl = await elem.src;
console.log('post image url: %s', imgUrl);
//imgUrl = await elem.src;
if (elem) {
imgUrl = await elem.poster;
console.log('post image url: %s', imgUrl);
}
//const elems = await hero.detach( hero.document.querySelectorAll('meta') );
//const elems = await hero.document.querySelectorAll('meta');
@ -85,7 +101,7 @@ import Hero from '@ulixee/hero'; @@ -85,7 +101,7 @@ import Hero from '@ulixee/hero';
// }
//}
await hero.close();
await hero.close();
})().catch(error => {
console.error("Error got:\n%s", error);
process.exit(1);

10
test/scrap_test.mjs

@ -24,17 +24,19 @@ import getConfigs from '../config.mjs'; @@ -24,17 +24,19 @@ import getConfigs from '../config.mjs';
//抖音测试
url = 'https://v.douyin.com/i2PBaR5B/'; //mob and pc
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
//直播地址测试
url = 'https://v.douyin.com/i2WaMoAN/';
configs.userAgent = configs.userAgents.mac_chrome;
configs.viewport = configs.viewports.pc;
configs.heroTabOptions.timeoutMs = 20000; //所有内容加载完成超时
console.log('Hero配置', configs);
//configs.userAgent = configs.userAgents.mac_chrome;
//configs.viewport = configs.viewports.pc;
const douyin = new Douyin(heroCloudServer);
//使用手机模式,默认为pc
douyin.setMode('mob');
console.log('Hero配置', configs);
console.log('请求中: %s ...', url);
data = await douyin.scrap(url);

Loading…
Cancel
Save