filesite
1 year ago
6 changed files with 118 additions and 3 deletions
@ -0,0 +1,51 @@ |
|||||||
|
import Hero from '@ulixee/hero'; |
||||||
|
|
||||||
|
class Douyin { |
||||||
|
constructor(heroCloudServer) { |
||||||
|
this.heroServer = heroCloudServer ? heroCloudServer : ''; |
||||||
|
} |
||||||
|
|
||||||
|
async scrap(url) { |
||||||
|
let data = {}; |
||||||
|
|
||||||
|
try { |
||||||
|
let options = {}; |
||||||
|
if (this.heroServer) { |
||||||
|
options.connectionToCore = this.heroServer; |
||||||
|
} |
||||||
|
|
||||||
|
const hero = new Hero(options); |
||||||
|
await hero.goto(url, { |
||||||
|
timeoutMs: 10000, |
||||||
|
referrer: 'https://wechat.com', |
||||||
|
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' |
||||||
|
}); |
||||||
|
|
||||||
|
//等待所有内容加载完成
|
||||||
|
const tab = await hero.activeTab; |
||||||
|
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000}); |
||||||
|
|
||||||
|
const elems = await hero.detach( hero.document.querySelectorAll('meta') ); |
||||||
|
let meta_name = ''; |
||||||
|
for (const elem of elems) { |
||||||
|
meta_name = elem.getAttribute('name'); |
||||||
|
if (!meta_name) {continue;} |
||||||
|
meta_name = meta_name.toLowerCase(); |
||||||
|
if (meta_name.indexOf('video_cover_image_url') > -1) { |
||||||
|
data.cover = elem.getAttribute('content'); |
||||||
|
}else if (meta_name.indexOf('video_title') > -1) { |
||||||
|
data.title = elem.getAttribute('content'); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
await hero.close(); |
||||||
|
}catch(error) { |
||||||
|
console.error('ERROR when request url via hero', url, error); |
||||||
|
}; |
||||||
|
|
||||||
|
return data; |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
|
||||||
|
export default Douyin; |
@ -0,0 +1,13 @@ |
|||||||
|
|
||||||
|
## 抖音的域名 |
||||||
|
|
||||||
|
``` |
||||||
|
douyin.com, *.douyin.com, *.iesdouyin.com, *.zijieapi.com, *.bytedance.com, *.yhgfb-cn-static.com, *.usergrowth.com.cn, *.bytescm.com, *.ibytedapm.com, *.bytetos.com, *.douyinpic.com, *.zjcdn.com |
||||||
|
``` |
||||||
|
|
||||||
|
|
||||||
|
来自抖音的安全策略: |
||||||
|
``` |
||||||
|
Content Security Policy directive: "default-src 'self' blob: data: 'unsafe-inline' 'unsafe-eval' *.zijieapi.com *.ibytedapm.com *.bytetos.com *.bytednsdoc.com *.zijieimg.com *.zjurl.cn *.pstatp.com *.bytecdn.cn *.isnssdk.com *.365yg.com *.ipstatp.com *.amemv.com *.ibytedtos.com *.ixigua.com *.ixiguavideo.com *.hypstarcdn.com *.tiktokcdn.com *.topbuzzcdn.com *.muscdn.com *.huoshanzhibo.com *.huoshanxiaoshipin.cn *.huoshanxiaoshipin.net *.huoshanvideo.cn *.huoshanvideo.net *.ieshuodong.cn *.ieshuodong.net *.byteoversea.com *.byted.org *.bytedance.net *.bytescm.com *.bytedance.com *.toutiaocloud.com *.snssdk.com *.toutiao.com *.huoshan.com *.douyin.com *.douyincdn.com *.jinritemai.com *.chengzijianzhan.com *.baike.com *.ribaoapi.com *.bytexservice.com *.pglstatp-toutiao.com *.oceanengine.com *.dyvideotape.com at.alicdn.com g.alicdn.com *.iesdouyin.com *.m.douyin.com *.byteimg.com *.zjcdn.com bytedance: android-webview-video-poster: snssdk1128: *.bytednsdoc.com *.douyinpic.com *.douyinstatic.com *.bdxiguaimg.com *.bdxiguastatic.com *.bytegoofy.com unpkg.com unpkg.byted-static.com *.draftstatic.com *.bytetcc.com |
||||||
|
``` |
||||||
|
|
@ -0,0 +1,38 @@ |
|||||||
|
import Hero from '@ulixee/hero'; |
||||||
|
|
||||||
|
(async () => { |
||||||
|
const hero = new Hero({ connectionToCore: 'ws://192.168.3.13:1818' }); |
||||||
|
|
||||||
|
const url = 'https://v.douyin.com/iJr1NsJJ/'; |
||||||
|
console.log("请求 %s 中。。。", url); |
||||||
|
await hero.goto(url, { |
||||||
|
timeoutMs: 10000, |
||||||
|
referrer: 'https://wechat.com', |
||||||
|
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' |
||||||
|
}); |
||||||
|
|
||||||
|
//const title = await hero.document.title;
|
||||||
|
//console.log("Page title:\n", title);
|
||||||
|
|
||||||
|
//等待所有内容加载完成
|
||||||
|
const tab = await hero.activeTab; |
||||||
|
await tab.waitForLoad('AllContentLoaded', {timeoutMs: 5000}); |
||||||
|
console.log('加载完成', await hero.isPaintingStable, await hero.isDomContentLoaded, await hero.isAllContentLoaded); |
||||||
|
|
||||||
|
const elems = await hero.detach( hero.document.querySelectorAll('meta') ); |
||||||
|
console.log('数量', elems.length); |
||||||
|
let meta_name = ''; |
||||||
|
for (const elem of elems) { |
||||||
|
meta_name = elem.getAttribute('name'); |
||||||
|
if (!meta_name) {continue;} |
||||||
|
meta_name = meta_name.toLowerCase(); |
||||||
|
if (meta_name.indexOf('video_cover_image_url') > -1 || meta_name.indexOf('video_title') > -1) { |
||||||
|
console.log('meta name %s, content: %s', meta_name, elem.getAttribute('content')); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
await hero.close(); |
||||||
|
})().catch(error => { |
||||||
|
console.log('ERROR when request url via hero', error); |
||||||
|
process.exit(1); |
||||||
|
}); |
@ -0,0 +1,10 @@ |
|||||||
|
import Douyin from '../bot/Douyin.mjs'; |
||||||
|
|
||||||
|
(async () => { |
||||||
|
const heroCloudServer = 'ws://192.168.3.13:1818'; |
||||||
|
const douyin = new Douyin(heroCloudServer); |
||||||
|
const url = 'https://v.douyin.com/iJr1NsJJ/'; |
||||||
|
console.log('请求中: %s ...', url); |
||||||
|
const data = await douyin.scrap(url); |
||||||
|
console.log('抖音网址 %s 解析结果: %s', url, JSON.stringify(data)); |
||||||
|
})(); |
Loading…
Reference in new issue