filesite
1 year ago
6 changed files with 118 additions and 3 deletions
@ -0,0 +1,51 @@
@@ -0,0 +1,51 @@
|
||||
import Hero from '@ulixee/hero'; |
||||
|
||||
class Douyin { |
||||
constructor(heroCloudServer) { |
||||
this.heroServer = heroCloudServer ? heroCloudServer : ''; |
||||
} |
||||
|
||||
async scrap(url) { |
||||
let data = {}; |
||||
|
||||
try { |
||||
let options = {}; |
||||
if (this.heroServer) { |
||||
options.connectionToCore = this.heroServer; |
||||
} |
||||
|
||||
const hero = new Hero(options); |
||||
await hero.goto(url, { |
||||
timeoutMs: 10000, |
||||
referrer: 'https://wechat.com', |
||||
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' |
||||
}); |
||||
|
||||
//等待所有内容加载完成
|
||||
const tab = await hero.activeTab; |
||||
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000}); |
||||
|
||||
const elems = await hero.detach( hero.document.querySelectorAll('meta') ); |
||||
let meta_name = ''; |
||||
for (const elem of elems) { |
||||
meta_name = elem.getAttribute('name'); |
||||
if (!meta_name) {continue;} |
||||
meta_name = meta_name.toLowerCase(); |
||||
if (meta_name.indexOf('video_cover_image_url') > -1) { |
||||
data.cover = elem.getAttribute('content'); |
||||
}else if (meta_name.indexOf('video_title') > -1) { |
||||
data.title = elem.getAttribute('content'); |
||||
} |
||||
} |
||||
|
||||
await hero.close(); |
||||
}catch(error) { |
||||
console.error('ERROR when request url via hero', url, error); |
||||
}; |
||||
|
||||
return data; |
||||
} |
||||
|
||||
} |
||||
|
||||
export default Douyin; |
@ -0,0 +1,13 @@
@@ -0,0 +1,13 @@
|
||||
|
||||
## 抖音的域名 |
||||
|
||||
``` |
||||
douyin.com, *.douyin.com, *.iesdouyin.com, *.zijieapi.com, *.bytedance.com, *.yhgfb-cn-static.com, *.usergrowth.com.cn, *.bytescm.com, *.ibytedapm.com, *.bytetos.com, *.douyinpic.com, *.zjcdn.com |
||||
``` |
||||
|
||||
|
||||
来自抖音的安全策略: |
||||
``` |
||||
Content Security Policy directive: "default-src 'self' blob: data: 'unsafe-inline' 'unsafe-eval' *.zijieapi.com *.ibytedapm.com *.bytetos.com *.bytednsdoc.com *.zijieimg.com *.zjurl.cn *.pstatp.com *.bytecdn.cn *.isnssdk.com *.365yg.com *.ipstatp.com *.amemv.com *.ibytedtos.com *.ixigua.com *.ixiguavideo.com *.hypstarcdn.com *.tiktokcdn.com *.topbuzzcdn.com *.muscdn.com *.huoshanzhibo.com *.huoshanxiaoshipin.cn *.huoshanxiaoshipin.net *.huoshanvideo.cn *.huoshanvideo.net *.ieshuodong.cn *.ieshuodong.net *.byteoversea.com *.byted.org *.bytedance.net *.bytescm.com *.bytedance.com *.toutiaocloud.com *.snssdk.com *.toutiao.com *.huoshan.com *.douyin.com *.douyincdn.com *.jinritemai.com *.chengzijianzhan.com *.baike.com *.ribaoapi.com *.bytexservice.com *.pglstatp-toutiao.com *.oceanengine.com *.dyvideotape.com at.alicdn.com g.alicdn.com *.iesdouyin.com *.m.douyin.com *.byteimg.com *.zjcdn.com bytedance: android-webview-video-poster: snssdk1128: *.bytednsdoc.com *.douyinpic.com *.douyinstatic.com *.bdxiguaimg.com *.bdxiguastatic.com *.bytegoofy.com unpkg.com unpkg.byted-static.com *.draftstatic.com *.bytetcc.com |
||||
``` |
||||
|
@ -0,0 +1,38 @@
@@ -0,0 +1,38 @@
|
||||
import Hero from '@ulixee/hero'; |
||||
|
||||
(async () => { |
||||
const hero = new Hero({ connectionToCore: 'ws://192.168.3.13:1818' }); |
||||
|
||||
const url = 'https://v.douyin.com/iJr1NsJJ/'; |
||||
console.log("请求 %s 中。。。", url); |
||||
await hero.goto(url, { |
||||
timeoutMs: 10000, |
||||
referrer: 'https://wechat.com', |
||||
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' |
||||
}); |
||||
|
||||
//const title = await hero.document.title;
|
||||
//console.log("Page title:\n", title);
|
||||
|
||||
//等待所有内容加载完成
|
||||
const tab = await hero.activeTab; |
||||
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000}); |
||||
console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded); |
||||
|
||||
const elems = await hero.detach( hero.document.querySelectorAll('meta') ); |
||||
console.log('数量', elems.length); |
||||
let meta_name = ''; |
||||
for (const elem of elems) { |
||||
meta_name = elem.getAttribute('name'); |
||||
if (!meta_name) {continue;} |
||||
meta_name = meta_name.toLowerCase(); |
||||
if (meta_name.indexOf('video_cover_image_url') > -1 || meta_name.indexOf('video_title') > -1) { |
||||
console.log('meta name %s, content: %s', meta_name, elem.getAttribute('content')); |
||||
} |
||||
} |
||||
|
||||
await hero.close(); |
||||
})().catch(error => { |
||||
console.log('ERROR when request url via hero', error); |
||||
process.exit(1); |
||||
}); |
@ -0,0 +1,10 @@
@@ -0,0 +1,10 @@
|
||||
import Douyin from '../bot/Douyin.mjs'; |
||||
|
||||
(async () => { |
||||
const heroCloudServer = 'ws://192.168.3.13:1818'; |
||||
const douyin = new Douyin(heroCloudServer); |
||||
const url = 'https://v.douyin.com/iJr1NsJJ/'; |
||||
console.log('请求中: %s ...', url); |
||||
const data = await douyin.scrap(url); |
||||
console.log('抖音网址 %s 解析结果: %s', url, JSON.stringify(data)); |
||||
})(); |
Loading…
Reference in new issue