Browse Source

cover image save done

master
filesite 1 year ago
parent
commit
36d9fd8778
  1. 4
      .gitignore
  2. 14
      bot/Bilibili.mjs
  3. 17
      bot/Douyin.mjs
  4. 14
      bot/Kuaishou.mjs
  5. 14
      bot/Xigua.mjs
  6. 16
      lib/common.mjs
  7. 16
      lib/tajian.mjs
  8. 6
      spider.mjs

4
.gitignore vendored

@ -1,8 +1,10 @@
package-lock.json package-lock.json
node_modules/ node_modules/
tmp/profile*.json tmp/profile*.json
todo/*.todo test/*.task
todo/*.task
data/*.url data/*.url
data/*.jpg data/*.jpg
data/*.jpeg
data/*.png data/*.png
data/*.txt data/*.txt

14
bot/Bilibili.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs'; import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs'; import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Bilibili extends HeroBot { class Bilibili extends HeroBot {
@ -57,6 +58,19 @@ class Bilibili extends HeroBot {
} }
} }
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;

17
bot/Douyin.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs'; import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs'; import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Douyin extends HeroBot { class Douyin extends HeroBot {
@ -58,6 +59,22 @@ class Douyin extends HeroBot {
} }
} }
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
//等待所有内容加载完成
const tab_img = await hero.activeTab;
await tab_img.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close(); await hero.close();
data.done = true; data.done = true;

14
bot/Kuaishou.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs'; import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs'; import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Kuaishou extends HeroBot { class Kuaishou extends HeroBot {
@ -41,6 +42,19 @@ class Kuaishou extends HeroBot {
data.cover = await elem.getAttribute('poster'); data.cover = await elem.getAttribute('poster');
} }
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;

14
bot/Xigua.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs'; import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs'; import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs'; import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Xigua extends HeroBot { class Xigua extends HeroBot {
@ -50,6 +51,19 @@ class Xigua extends HeroBot {
} }
} }
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close(); await hero.close();
data.bot = this.name; data.bot = this.name;

16
lib/common.mjs

@ -45,4 +45,20 @@ export default {
return url; return url;
}, },
getImageType: function(url) {
let imgType = 'jpeg';
if (/\.jpe?g/ig.test(url)) {
imgType = 'jpeg';
}else if (/\.png/ig.test(url)) {
imgType = 'png';
}else if (/\.webp?/ig.test(url)) {
imgType = 'webp';
}else if (/\.gif?/ig.test(url)) {
imgType = 'gif';
}
return imgType;
},
}; };

16
lib/tajian.mjs

@ -19,7 +19,7 @@ class TaJian {
URL=https://microsoft.com/ URL=https://microsoft.com/
*/ */
async saveUrlShortcut(filename, data) { async saveUrlShortcut(filename, data) {
console.log('TaJian try to save shortcut url', data); //console.log('TaJian try to save shortcut url');
try { try {
@ -39,7 +39,7 @@ URL=${data.url}
} }
async saveDescriptionFiles(filename, data) { async saveDescriptionFiles(filename, data) {
console.log('TaJian try to save description files', data); //console.log('TaJian try to save description files');
try { try {
const dirPath = path.resolve(this.save_dir); const dirPath = path.resolve(this.save_dir);
@ -49,9 +49,19 @@ URL=${data.url}
let content = data.title; let content = data.title;
await writeFile(filepath, content, { encoding: 'utf8' }); await writeFile(filepath, content, { encoding: 'utf8' });
if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) {
filepath = `${dirPath}/${filename}.${data.cover_type}`;
content = Buffer.from(data.cover_base64, "base64"); //保存图片文件
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`; filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover; content = `${filename}.${data.cover_type}`; //保存图片路径
await writeFile(filepath, content, { encoding: 'utf8' }); await writeFile(filepath, content, { encoding: 'utf8' });
}else {
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover; //保存图片网址
await writeFile(filepath, content, { encoding: 'utf8' });
}
}catch(error) { }catch(error) {
console.error('Save description files failed: %s', error); console.error('Save description files failed: %s', error);
return false; return false;

6
spider.mjs

@ -56,13 +56,9 @@ import cron from 'node-cron';
taskMoniter.setTaskRunning(task.id); taskMoniter.setTaskRunning(task.id);
const data = await bot.scrap(task.url); const data = await bot.scrap(task.url);
console.log('Data got by bot', data); //console.log('Data got by bot', data);
if (typeof(data.done) != 'undefined' && data.done == true) { if (typeof(data.done) != 'undefined' && data.done == true) {
if (data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
}
if ( if (
await tajian.saveUrlShortcut(task.id, data) await tajian.saveUrlShortcut(task.id, data)
&& await tajian.saveDescriptionFiles(task.id, data) && await tajian.saveDescriptionFiles(task.id, data)

Loading…
Cancel
Save