diff --git a/.gitignore b/.gitignore index bf2b2ed..a16a9d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ package-lock.json node_modules/ tmp/profile*.json -todo/*.todo +test/*.task +todo/*.task data/*.url data/*.jpg +data/*.jpeg data/*.png data/*.txt diff --git a/bot/Bilibili.mjs b/bot/Bilibili.mjs index 780998c..984a3bc 100644 --- a/bot/Bilibili.mjs +++ b/bot/Bilibili.mjs @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; +import common from '../lib/common.mjs'; class Bilibili extends HeroBot { @@ -57,6 +58,19 @@ class Bilibili extends HeroBot { } } + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } + } + await hero.close(); data.bot = this.name; diff --git a/bot/Douyin.mjs b/bot/Douyin.mjs index 82e4849..85462a4 100644 --- a/bot/Douyin.mjs +++ b/bot/Douyin.mjs @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; +import common from '../lib/common.mjs'; class Douyin extends HeroBot { @@ -58,6 +59,22 @@ class Douyin extends HeroBot { } } + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + //等待所有内容加载完成 + const tab_img = await hero.activeTab; + await tab_img.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs}); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } + } + await hero.close(); data.done = true; diff --git a/bot/Kuaishou.mjs b/bot/Kuaishou.mjs index 36aec49..fa3cd44 100644 --- a/bot/Kuaishou.mjs +++ b/bot/Kuaishou.mjs @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; +import common from '../lib/common.mjs'; class Kuaishou extends HeroBot { @@ -41,6 +42,19 @@ class Kuaishou extends HeroBot { data.cover = await elem.getAttribute('poster'); } + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } + } + await hero.close(); data.bot = this.name; diff --git a/bot/Xigua.mjs b/bot/Xigua.mjs index 19e3500..5536862 100644 --- a/bot/Xigua.mjs +++ b/bot/Xigua.mjs @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; import configs from '../config.mjs'; import HeroBot from './HeroBot.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; +import common from '../lib/common.mjs'; class Xigua extends HeroBot { @@ -50,6 +51,19 @@ class Xigua extends HeroBot { } } + //get cover image's base64 data + if (typeof(data.cover) != 'undefined' && data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + + const response = await hero.goto(data.cover); + const imgBuffer = await response.buffer; + //console.log('Cover image fetch done', imgBuffer.toString('base64')); + if (imgBuffer) { + data.cover_base64 = imgBuffer.toString('base64'); + data.cover_type = common.getImageType(data.cover); + } + } + await hero.close(); data.bot = this.name; diff --git a/lib/common.mjs b/lib/common.mjs index 3a96960..c7ff63b 100644 --- a/lib/common.mjs +++ b/lib/common.mjs @@ -45,4 +45,20 @@ export default { return url; }, + getImageType: function(url) { + let imgType = 'jpeg'; + + if (/\.jpe?g/ig.test(url)) { + imgType = 'jpeg'; + }else if (/\.png/ig.test(url)) { + imgType = 'png'; + }else if (/\.webp?/ig.test(url)) { + imgType = 'webp'; + }else if (/\.gif?/ig.test(url)) { + imgType = 'gif'; + } + + return imgType; + }, + }; diff --git a/lib/tajian.mjs b/lib/tajian.mjs index 7371c10..385a702 100644 --- a/lib/tajian.mjs +++ b/lib/tajian.mjs @@ -19,7 +19,7 @@ class TaJian { URL=https://microsoft.com/ */ async saveUrlShortcut(filename, data) { - console.log('TaJian try to save shortcut url', data); + //console.log('TaJian try to save shortcut url'); try { @@ -39,7 +39,7 @@ URL=${data.url} } async saveDescriptionFiles(filename, data) { - console.log('TaJian try to save description files', data); + //console.log('TaJian try to save description files'); try { const dirPath = path.resolve(this.save_dir); @@ -49,9 +49,19 @@ URL=${data.url} let content = data.title; await writeFile(filepath, content, { encoding: 'utf8' }); - filepath = `${dirPath}/${filename}_cover.txt`; - content = data.cover; - await writeFile(filepath, content, { encoding: 'utf8' }); + if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) { + filepath = `${dirPath}/${filename}.${data.cover_type}`; + content = Buffer.from(data.cover_base64, "base64"); //保存图片文件 + await writeFile(filepath, content, { encoding: 'utf8' }); + + filepath = `${dirPath}/${filename}_cover.txt`; + content = `${filename}.${data.cover_type}`; //保存图片路径 + await writeFile(filepath, content, { encoding: 'utf8' }); + }else { + filepath = `${dirPath}/${filename}_cover.txt`; + content = data.cover; //保存图片网址 + await writeFile(filepath, content, { encoding: 'utf8' }); + } }catch(error) { console.error('Save description files failed: %s', error); return false; diff --git a/spider.mjs b/spider.mjs index 93e252b..87e3363 100644 --- a/spider.mjs +++ b/spider.mjs @@ -56,13 +56,9 @@ import cron from 'node-cron'; taskMoniter.setTaskRunning(task.id); const data = await bot.scrap(task.url); - console.log('Data got by bot', data); + //console.log('Data got by bot', data); if (typeof(data.done) != 'undefined' && data.done == true) { - if (data.cover) { - data.cover = common.getAbsoluteUrl(data.cover); - } - if ( await tajian.saveUrlShortcut(task.id, data) && await tajian.saveDescriptionFiles(task.id, data)