diff --git a/config.mjs b/config.mjs index 4678057..bdf771e 100644 --- a/config.mjs +++ b/config.mjs @@ -1,7 +1,7 @@ export default { //自动任务相关配置 task_list_dir: 'todo/', //待抓取任务文件保存目录 - task_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io + data_save_dir: 'data/', //抓取完成数据保存目录,文件格式:.url快捷方式,详细说明见:https://filesite.io //bot相关配置 diff --git a/lib/common.mjs b/lib/common.mjs index 7fc60d3..3a96960 100644 --- a/lib/common.mjs +++ b/lib/common.mjs @@ -37,4 +37,12 @@ export default { return botName; }, + getAbsoluteUrl: function(url) { + if (/^\/\//.test(url)) { + url = `https:${url}`; + } + + return url; + }, + }; diff --git a/lib/tajian.mjs b/lib/tajian.mjs index f825f80..7371c10 100644 --- a/lib/tajian.mjs +++ b/lib/tajian.mjs @@ -2,6 +2,10 @@ * 基于Bot跟Machete的TaJian对接 * 按filestie.io标准把Bot爬虫返回的数据格式化保存为.url及其说明文件 */ +import common from './common.mjs'; +import fs from 'node:fs'; +import { writeFile } from 'node:fs/promises'; +import path from 'node:path'; class TaJian { @@ -9,9 +13,51 @@ class TaJian { this.save_dir = data_save_dir; } - async saveUrlShortcut(data) { - console.log('TaJian try to save data', data); + /* + * Example: + [InternetShortcut] + URL=https://microsoft.com/ + */ + async saveUrlShortcut(filename, data) { + console.log('TaJian try to save shortcut url', data); + try { + + const dirPath = path.resolve(this.save_dir); + const filepath = `${dirPath}/${filename}.url`; + + const shortUrlContent = `\[InternetShortcut\] +URL=${data.url} +`; + await writeFile(filepath, shortUrlContent, { encoding: 'utf8' }); + }catch(error) { + console.error('Save short url file failed: %s', error); + return false; + } + + return true; + } + + async saveDescriptionFiles(filename, data) { + console.log('TaJian try to save description files', data); + + try { + const dirPath = path.resolve(this.save_dir); + + //save title + let filepath = `${dirPath}/${filename}_title.txt`; + let content = data.title; + await writeFile(filepath, content, { encoding: 'utf8' }); + + filepath = `${dirPath}/${filename}_cover.txt`; + content = data.cover; + await writeFile(filepath, content, { encoding: 'utf8' }); + }catch(error) { + console.error('Save description files failed: %s', error); + return false; + } + + return true; } } diff --git a/spider.mjs b/spider.mjs index 7dd3648..93e252b 100644 --- a/spider.mjs +++ b/spider.mjs @@ -59,8 +59,18 @@ import cron from 'node-cron'; console.log('Data got by bot', data); if (typeof(data.done) != 'undefined' && data.done == true) { - taskMoniter.setTaskDone(task.id); - await tajian.saveUrlShortcut(data); + if (data.cover) { + data.cover = common.getAbsoluteUrl(data.cover); + } + + if ( + await tajian.saveUrlShortcut(task.id, data) + && await tajian.saveDescriptionFiles(task.id, data) + ) { + taskMoniter.setTaskDone(task.id); + }else { + taskMoniter.setTaskFailed(task.id); + } }else { taskMoniter.setTaskFailed(task.id); } diff --git a/test/tajia_test.mjs b/test/tajia_test.mjs new file mode 100644 index 0000000..f0045d4 --- /dev/null +++ b/test/tajia_test.mjs @@ -0,0 +1,25 @@ +import TaJian from '../lib/tajian.mjs'; + +(async () => { + + const data = { + url: 'https://v.douyin.com/i8sEyb6/', + done: true, + bot: 'douyin', + title: '自由与成功 - 抖音', + cover: '//p6-pc-sign.douyinpic.com/image-cut-tos-priv/d1b1e96513a755b2d6ff4cf8d8260f9b~tplv-dy-resize-origshort-autoq-75:330.jpeg?biz_tag=pcweb_cover&from=3213915784&s=PackSourceEnum_AWEME_DETAIL&sc=cover&se=false&x-expires=2010128400&x-signature=VuJiezXPv7y13fu63Krn9tIbLvQ%3D' + }; + const filename = 'douyintest'; + + const tajian = new TaJian('../data/'); + + const saveUrlDone = await tajian.saveUrlShortcut(filename, data); + console.log('shortcut save done', saveUrlDone); + + const saveDescDone = await tajian.saveDescriptionFiles(filename, data); + console.log('descriptions save done', saveDescDone); + +})().catch(error => { + console.error("Error got:\n%s", error); + process.exit(1); +});