Browse Source

bot lib for douyin done

master
filesite 1 year ago
parent
commit
f978e2a429
  1. 51
      bot/Douyin.mjs
  2. 13
      bypass/douyin.md
  3. 2
      package.json
  4. 7
      test/cloud_test.mjs
  5. 38
      test/douyin_test.mjs
  6. 10
      test/scrap_test.mjs

51
bot/Douyin.mjs

@ -0,0 +1,51 @@
import Hero from '@ulixee/hero';
class Douyin {
constructor(heroCloudServer) {
this.heroServer = heroCloudServer ? heroCloudServer : '';
}
async scrap(url) {
let data = {};
try {
let options = {};
if (this.heroServer) {
options.connectionToCore = this.heroServer;
}
const hero = new Hero(options);
await hero.goto(url, {
timeoutMs: 10000,
referrer: 'https://wechat.com',
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
});
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000});
const elems = await hero.detach( hero.document.querySelectorAll('meta') );
let meta_name = '';
for (const elem of elems) {
meta_name = elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
if (meta_name.indexOf('video_cover_image_url') > -1) {
data.cover = elem.getAttribute('content');
}else if (meta_name.indexOf('video_title') > -1) {
data.title = elem.getAttribute('content');
}
}
await hero.close();
}catch(error) {
console.error('ERROR when request url via hero', url, error);
};
return data;
}
}
export default Douyin;

13
bypass/douyin.md

@ -0,0 +1,13 @@
## 抖音的域名
```
douyin.com, *.douyin.com, *.iesdouyin.com, *.zijieapi.com, *.bytedance.com, *.yhgfb-cn-static.com, *.usergrowth.com.cn, *.bytescm.com, *.ibytedapm.com, *.bytetos.com, *.douyinpic.com, *.zjcdn.com
```
来自抖音的安全策略:
```
Content Security Policy directive: "default-src 'self' blob: data: 'unsafe-inline' 'unsafe-eval' *.zijieapi.com *.ibytedapm.com *.bytetos.com *.bytednsdoc.com *.zijieimg.com *.zjurl.cn *.pstatp.com *.bytecdn.cn *.isnssdk.com *.365yg.com *.ipstatp.com *.amemv.com *.ibytedtos.com *.ixigua.com *.ixiguavideo.com *.hypstarcdn.com *.tiktokcdn.com *.topbuzzcdn.com *.muscdn.com *.huoshanzhibo.com *.huoshanxiaoshipin.cn *.huoshanxiaoshipin.net *.huoshanvideo.cn *.huoshanvideo.net *.ieshuodong.cn *.ieshuodong.net *.byteoversea.com *.byted.org *.bytedance.net *.bytescm.com *.bytedance.com *.toutiaocloud.com *.snssdk.com *.toutiao.com *.huoshan.com *.douyin.com *.douyincdn.com *.jinritemai.com *.chengzijianzhan.com *.baike.com *.ribaoapi.com *.bytexservice.com *.pglstatp-toutiao.com *.oceanengine.com *.dyvideotape.com at.alicdn.com g.alicdn.com *.iesdouyin.com *.m.douyin.com *.byteimg.com *.zjcdn.com bytedance: android-webview-video-poster: snssdk1128: *.bytednsdoc.com *.douyinpic.com *.douyinstatic.com *.bdxiguaimg.com *.bdxiguastatic.com *.bytegoofy.com unpkg.com unpkg.byted-static.com *.draftstatic.com *.bytetcc.com
```

2
package.json

@ -1,4 +1,6 @@
{ {
"name": "machege-hero",
"type": "module",
"dependencies": { "dependencies": {
"@ulixee/cloud": "^2.0.0-alpha.24", "@ulixee/cloud": "^2.0.0-alpha.24",
"@ulixee/hero": "^2.0.0-alpha.24" "@ulixee/hero": "^2.0.0-alpha.24"

7
test/cloud_test.mjs

@ -3,15 +3,16 @@ import Hero from '@ulixee/hero';
(async () => { (async () => {
const hero = new Hero({ connectionToCore: 'ws://192.168.3.13:1818' }); const hero = new Hero({ connectionToCore: 'ws://192.168.3.13:1818' });
const url = 'https://filesite.io'; //const url = 'https://filesite.io';
//const url = 'https://www.google.com'; //const url = 'https://www.google.com';
const url = 'https://v.douyin.com/iJr1NsJJ/';
await hero.goto(url, { await hero.goto(url, {
timeoutMs: 120000, timeoutMs: 20000,
referrer: '-' referrer: '-'
}); });
const title = await hero.document.title; const title = await hero.document.title;
console.log('Page title', title); console.log("Page title:\n", title);
await hero.close(); await hero.close();
})().catch(error => { })().catch(error => {

38
test/douyin_test.mjs

@ -0,0 +1,38 @@
import Hero from '@ulixee/hero';
(async () => {
const hero = new Hero({ connectionToCore: 'ws://192.168.3.13:1818' });
const url = 'https://v.douyin.com/iJr1NsJJ/';
console.log("请求 %s 中。。。", url);
await hero.goto(url, {
timeoutMs: 10000,
referrer: 'https://wechat.com',
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
});
//const title = await hero.document.title;
//console.log("Page title:\n", title);
//等待所有内容加载完成
const tab = await hero.activeTab;
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000});
console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded);
const elems = await hero.detach( hero.document.querySelectorAll('meta') );
console.log('数量', elems.length);
let meta_name = '';
for (const elem of elems) {
meta_name = elem.getAttribute('name');
if (!meta_name) {continue;}
meta_name = meta_name.toLowerCase();
if (meta_name.indexOf('video_cover_image_url') > -1 || meta_name.indexOf('video_title') > -1) {
console.log('meta name %s, content: %s', meta_name, elem.getAttribute('content'));
}
}
await hero.close();
})().catch(error => {
console.log('ERROR when request url via hero', error);
process.exit(1);
});

10
test/scrap_test.mjs

@ -0,0 +1,10 @@
import Douyin from '../bot/Douyin.mjs';
(async () => {
const heroCloudServer = 'ws://192.168.3.13:1818';
const douyin = new Douyin(heroCloudServer);
const url = 'https://v.douyin.com/iJr1NsJJ/';
console.log('请求中: %s ...', url);
const data = await douyin.scrap(url);
console.log('抖音网址 %s 解析结果: %s', url, JSON.stringify(data));
})();
Loading…
Cancel
Save