Browse Source

cover image save done

master
filesite 1 year ago
parent
commit
36d9fd8778
  1. 4
      .gitignore
  2. 14
      bot/Bilibili.mjs
  3. 17
      bot/Douyin.mjs
  4. 14
      bot/Kuaishou.mjs
  5. 14
      bot/Xigua.mjs
  6. 16
      lib/common.mjs
  7. 20
      lib/tajian.mjs
  8. 6
      spider.mjs

4
.gitignore vendored

@ -1,8 +1,10 @@ @@ -1,8 +1,10 @@
package-lock.json
node_modules/
tmp/profile*.json
todo/*.todo
test/*.task
todo/*.task
data/*.url
data/*.jpg
data/*.jpeg
data/*.png
data/*.txt

14
bot/Bilibili.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Bilibili extends HeroBot {
@ -57,6 +58,19 @@ class Bilibili extends HeroBot { @@ -57,6 +58,19 @@ class Bilibili extends HeroBot {
}
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close();
data.bot = this.name;

17
bot/Douyin.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Douyin extends HeroBot {
@ -58,6 +59,22 @@ class Douyin extends HeroBot { @@ -58,6 +59,22 @@ class Douyin extends HeroBot {
}
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
//等待所有内容加载完成
const tab_img = await hero.activeTab;
await tab_img.waitForLoad('AllContentLoaded', {timeoutMs: configs.heroTabOptions.timeoutMs});
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close();
data.done = true;

14
bot/Kuaishou.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Kuaishou extends HeroBot {
@ -41,6 +42,19 @@ class Kuaishou extends HeroBot { @@ -41,6 +42,19 @@ class Kuaishou extends HeroBot {
data.cover = await elem.getAttribute('poster');
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close();
data.bot = this.name;

14
bot/Xigua.mjs

@ -2,6 +2,7 @@ import Hero from '@ulixee/hero'; @@ -2,6 +2,7 @@ import Hero from '@ulixee/hero';
import configs from '../config.mjs';
import HeroBot from './HeroBot.mjs';
import ClientLogPlugin from '../plugin/ClientLogPlugin.mjs';
import common from '../lib/common.mjs';
class Xigua extends HeroBot {
@ -50,6 +51,19 @@ class Xigua extends HeroBot { @@ -50,6 +51,19 @@ class Xigua extends HeroBot {
}
}
//get cover image's base64 data
if (typeof(data.cover) != 'undefined' && data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
const response = await hero.goto(data.cover);
const imgBuffer = await response.buffer;
//console.log('Cover image fetch done', imgBuffer.toString('base64'));
if (imgBuffer) {
data.cover_base64 = imgBuffer.toString('base64');
data.cover_type = common.getImageType(data.cover);
}
}
await hero.close();
data.bot = this.name;

16
lib/common.mjs

@ -45,4 +45,20 @@ export default { @@ -45,4 +45,20 @@ export default {
return url;
},
getImageType: function(url) {
let imgType = 'jpeg';
if (/\.jpe?g/ig.test(url)) {
imgType = 'jpeg';
}else if (/\.png/ig.test(url)) {
imgType = 'png';
}else if (/\.webp?/ig.test(url)) {
imgType = 'webp';
}else if (/\.gif?/ig.test(url)) {
imgType = 'gif';
}
return imgType;
},
};

20
lib/tajian.mjs

@ -19,7 +19,7 @@ class TaJian { @@ -19,7 +19,7 @@ class TaJian {
URL=https://microsoft.com/
*/
async saveUrlShortcut(filename, data) {
console.log('TaJian try to save shortcut url', data);
//console.log('TaJian try to save shortcut url');
try {
@ -39,7 +39,7 @@ URL=${data.url} @@ -39,7 +39,7 @@ URL=${data.url}
}
async saveDescriptionFiles(filename, data) {
console.log('TaJian try to save description files', data);
//console.log('TaJian try to save description files');
try {
const dirPath = path.resolve(this.save_dir);
@ -49,9 +49,19 @@ URL=${data.url} @@ -49,9 +49,19 @@ URL=${data.url}
let content = data.title;
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover;
await writeFile(filepath, content, { encoding: 'utf8' });
if (typeof(data.cover_base64) != 'undefined' && data.cover_base64) {
filepath = `${dirPath}/${filename}.${data.cover_type}`;
content = Buffer.from(data.cover_base64, "base64"); //保存图片文件
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`;
content = `${filename}.${data.cover_type}`; //保存图片路径
await writeFile(filepath, content, { encoding: 'utf8' });
}else {
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover; //保存图片网址
await writeFile(filepath, content, { encoding: 'utf8' });
}
}catch(error) {
console.error('Save description files failed: %s', error);
return false;

6
spider.mjs

@ -56,13 +56,9 @@ import cron from 'node-cron'; @@ -56,13 +56,9 @@ import cron from 'node-cron';
taskMoniter.setTaskRunning(task.id);
const data = await bot.scrap(task.url);
console.log('Data got by bot', data);
//console.log('Data got by bot', data);
if (typeof(data.done) != 'undefined' && data.done == true) {
if (data.cover) {
data.cover = common.getAbsoluteUrl(data.cover);
}
if (
await tajian.saveUrlShortcut(task.id, data)
&& await tajian.saveDescriptionFiles(task.id, data)

Loading…
Cancel
Save