Browse Source

tajian data save done

master
filesite 1 year ago
parent
commit
cd3297ef3a
  1. 2
      config.mjs
  2. 8
      lib/common.mjs
  3. 50
      lib/tajian.mjs
  4. 14
      spider.mjs
  5. 25
      test/tajia_test.mjs

2
config.mjs

@ -1,7 +1,7 @@
export default { export default {
//自动任务相关配置 //自动任务相关配置
task_list_dir: 'todo/', //待抓取任务文件保存目录 task_list_dir: 'todo/', //待抓取任务文件保存目录
task_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io
//bot相关配置 //bot相关配置

8
lib/common.mjs

@ -37,4 +37,12 @@ export default {
return botName; return botName;
}, },
getAbsoluteUrl: function(url) {
if (/^\/\//.test(url)) {
url = `https:${url}`;
}
return url;
},
}; };

50
lib/tajian.mjs

@ -2,6 +2,10 @@
* 基于Bot跟Machete的TaJian对接 * 基于Bot跟Machete的TaJian对接
* 按filestie.io标准把Bot爬虫返回的数据格式化保存为.url及其说明文件 * 按filestie.io标准把Bot爬虫返回的数据格式化保存为.url及其说明文件
*/ */
import common from './common.mjs';
import fs from 'node:fs';
import { writeFile } from 'node:fs/promises';
import path from 'node:path';
class TaJian { class TaJian {
@ -9,9 +13,51 @@ class TaJian {
this.save_dir = data_save_dir; this.save_dir = data_save_dir;
} }
async saveUrlShortcut(data) { /*
console.log('TaJian try to save data', data); * Example:
[InternetShortcut]
URL=https://microsoft.com/
*/
async saveUrlShortcut(filename, data) {
console.log('TaJian try to save shortcut url', data);
try {
const dirPath = path.resolve(this.save_dir);
const filepath = `${dirPath}/${filename}.url`;
const shortUrlContent = `\[InternetShortcut\]
URL=${data.url}
`;
await writeFile(filepath, shortUrlContent, { encoding: 'utf8' });
}catch(error) {
console.error('Save short url file failed: %s', error);
return false;
}
return true;
}
async saveDescriptionFiles(filename, data) {
console.log('TaJian try to save description files', data);
try {
const dirPath = path.resolve(this.save_dir);
//save title
let filepath = `${dirPath}/${filename}_title.txt`;
let content = data.title;
await writeFile(filepath, content, { encoding: 'utf8' });
filepath = `${dirPath}/${filename}_cover.txt`;
content = data.cover;
await writeFile(filepath, content, { encoding: 'utf8' });
}catch(error) {
console.error('Save description files failed: %s', error);
return false;
}
return true;
} }
} }

14
spider.mjs

@ -59,8 +59,18 @@ import cron from 'node-cron';
console.log('Data got by bot', data); console.log('Data got by bot', data);
if (typeof(data.done) != 'undefined' && data.done == true) { if (typeof(data.done) != 'undefined' && data.done == true) {
taskMoniter.setTaskDone(task.id); if (data.cover) {
await tajian.saveUrlShortcut(data); data.cover = common.getAbsoluteUrl(data.cover);
}
if (
await tajian.saveUrlShortcut(task.id, data)
&& await tajian.saveDescriptionFiles(task.id, data)
) {
taskMoniter.setTaskDone(task.id);
}else {
taskMoniter.setTaskFailed(task.id);
}
}else { }else {
taskMoniter.setTaskFailed(task.id); taskMoniter.setTaskFailed(task.id);
} }

25
test/tajia_test.mjs

@ -0,0 +1,25 @@
import TaJian from '../lib/tajian.mjs';
(async () => {
const data = {
url: 'https://v.douyin.com/i8sEyb6/',
done: true,
bot: 'douyin',
title: '自由与成功 - 抖音',
cover: '//p6-pc-sign.douyinpic.com/image-cut-tos-priv/d1b1e96513a755b2d6ff4cf8d8260f9b~tplv-dy-resize-origshort-autoq-75:330.jpeg?biz_tag=pcweb_cover&from=3213915784&s=PackSourceEnum_AWEME_DETAIL&sc=cover&se=false&x-expires=2010128400&x-signature=VuJiezXPv7y13fu63Krn9tIbLvQ%3D'
};
const filename = 'douyintest';
const tajian = new TaJian('../data/');
const saveUrlDone = await tajian.saveUrlShortcut(filename, data);
console.log('shortcut save done', saveUrlDone);
const saveDescDone = await tajian.saveDescriptionFiles(filename, data);
console.log('descriptions save done', saveDescDone);
})().catch(error => {
console.error("Error got:\n%s", error);
process.exit(1);
});
Loading…
Cancel
Save