306 lines
9.2 KiB
JavaScript
Raw Normal View History

2020-02-21 20:05:52 -08:00
const path = require('path');
const glob = require('../lib/glob');
const { chunk, uniq, difference } = require('lodash');
const Promise = require('bluebird');
const fs = require('fs-extra');
const log = require('fancy-log');
const tweetparse = require('../lib/tweetparse');
const getEngines = require('./renderers');
const Twitter = require('twitter-lite');
const frontmatter = require('front-matter');
const createFileLoader = require('./files');
const { URL } = require('url');
const ROOT = path.resolve(__dirname, '../..');
exports.parse = async function parsePageContent () {
const [ files, twitter, twitterBackup, twitterCache, { siteInfo } ] = await Promise.all([
glob('pages/**/*.{md,hbs,html,xml}', { cwd: ROOT }),
fs.readJson(resolve('twitter-config.json')).catch(() => null)
.then(getTwitterClient),
fs.readJson(resolve('twitter-backup.json')).catch(() => {}),
fs.readJson(resolve('twitter-cache.json')).catch(() => {}),
fs.readJson(resolve('package.json')).catch(() => ({})),
]);
const loadFiles = createFileLoader();
const tweetsNeeded = [];
let pages = await Promise.map(files, async (filepath) => {
const { dir, name, ext } = path.parse(filepath);
const basename = path.basename(filepath);
// this is an include, skip it.
if (name[0] === '_') return;
const cwd = resolve(dir);
const input = resolve(filepath);
const outDir = path.join('dist', dir.slice(6));
const siteDir = `/${dir.slice(6)}`;
// if cwd === ROOT then we're in the bottom directory and there is no base
const base = path.relative(cwd, ROOT) && path.basename(dir);
/* Load Page Content **************************************************/
const [ raw, { ctime, mtime }, { images, titlecard } ] = await Promise.all([
fs.readFile(input).catch(() => null),
stat(input),
loadFiles(cwd, siteDir),
]);
// empty file
if (!raw) return;
try {
var { attributes: meta, body } = frontmatter(raw.toString('utf8'));
} catch (e) {
log.error('Error while parsing frontmatter for ' + filepath, e);
return;
}
// page is marked to be ignored, skip it.
if (meta.ignore) return;
meta.path = filepath;
meta.cwd = cwd;
meta.base = base;
meta.outDir = outDir;
meta.input = input;
meta.source = body;
meta.dateCreated = meta.date && new Date(meta.date) || ctime;
meta.dateModified = mtime;
meta.siteDir = siteDir;
meta.name = name;
meta.ext = ext;
meta.titlecard = titlecard;
meta.images = images;
var flags = new Set(meta.classes || []);
var isIndexPage = meta.isIndex = (name === 'index');
var isRootPage = meta.isRoot = (siteDir === '/');
var isCleanUrl = meta.isCleanUrl = [ '.hbs', '.md' ].includes(ext);
if ([ '.hbs', '.html', '.xml' ].includes(ext)) {
meta.engine = 'hbs';
} else if (ext === '.md') {
meta.engine = 'md';
} else {
meta.engine = 'raw';
}
flags.add(titlecard ? 'has-titlecard' : 'no-titlecard');
flags.add(meta.title ? 'has-title' : 'no-title');
flags.add(meta.subtitle ? 'has-subtitle' : 'no-subtitle');
flags.add(meta.description ? 'has-descrip' : 'no-descrip');
let slug, output, jsonOutput;
if (isRootPage) {
if (isCleanUrl) {
slug = '';
output = resolve(outDir, name, 'index.html');
jsonOutput = resolve(outDir, name + '.json');
} else {
slug = '';
output = resolve(outDir, basename);
jsonOutput = resolve(outDir, basename + '.json');
}
} else if (isCleanUrl) {
slug = name;
if (isIndexPage) {
output = resolve(outDir, 'index.html');
} else {
output = resolve(outDir, name, 'index.html');
}
jsonOutput = resolve(outDir, name + '.json');
} else {
slug = base;
output = resolve(outDir, basename);
jsonOutput = resolve(outDir, basename + '.json');
}
meta.slug = slug;
meta.output = output;
meta.json = jsonOutput;
const url = new URL(siteInfo.siteUrl);
if ([ '.hbs', '.md' ].includes(ext)) {
url.pathname = path.join(siteDir, slug);
} else if (isIndexPage) {
url.pathname = siteDir;
} else {
url.pathname = path.join(siteDir, path.basename(filepath));
}
meta.url = url.pathname;
meta.fullurl = url.toString();
/* Process Tweets **************************************************/
const tweets = [];
if (meta.tweet) {
meta.tweet = [ meta.tweet ].flat(1).map(parseTweetId);
tweets.push(...meta.tweet);
}
if (meta.tweets) {
meta.tweets = meta.tweets.map(parseTweetId);
tweets.push(...meta.tweets);
}
for (const id of tweets) {
if (!twitterCache[id]) {
tweetsNeeded.push(id);
}
}
meta.tweets = tweets;
flags.add(tweets.length ? 'has-tweets' : 'no-tweets');
/* Process Flags **************************************************/
meta.classes = Array.from(flags);
meta.flags = meta.classes.reduce((res, item) => {
var camelCased = item.replace(/-([a-z])/g, (g) => g[1].toUpperCase());
res[camelCased] = true;
return res;
}, {});
return meta;
});
pages = pages.filter(Boolean);
/* Load Missing Tweets **************************************************/
if (tweetsNeeded.length) {
log('Fetching tweets: ' + tweetsNeeded.join(', '));
const arriving = await Promise.all(chunk(uniq(tweetsNeeded), 99).map(twitter));
const loaded = [];
for (const tweet of arriving.flat(1)) {
if (!twitterBackup[tweet.id_str]) twitterBackup[tweet.id_str] = tweet;
twitterCache[tweet.id_str] = tweetparse(tweet);
loaded.push(tweet.id_str);
}
const absent = difference(tweetsNeeded, loaded);
for (const id of absent) {
if (twitterBackup[id]) {
log('Pulled tweet from backup ' + id);
twitterCache[id] = tweetparse(twitterBackup[id]);
continue;
}
log.error('Could not find tweet ' + id);
}
}
/* Apply Tweets to Pages **************************************************/
const twitterMedia = [];
// now loop through pages and substitute the tweet data for the ids
for (const page of pages) {
if (!page.tweets || !page.tweets.length) continue;
page.tweets = page.tweets.reduce((dict, tweetid) => {
const tweet = twitterCache[tweetid];
if (!tweet) {
log.error(`Tweet ${tweetid} is missing from the cache.`);
return dict;
}
dict[tweetid] = tweet;
twitterMedia.push( ...tweet.media );
return dict;
}, {});
}
await Promise.all([
fs.writeFile(path.join(ROOT, 'pages.json'), JSON.stringify(pages, null, 2)),
fs.writeFile(path.join(ROOT, 'twitter-media.json'), JSON.stringify(twitterMedia, null, 2)),
fs.writeFile(path.join(ROOT, 'twitter-cache.json'), JSON.stringify(twitterCache, null, 2)),
fs.writeFile(path.join(ROOT, 'twitter-backup.json'), JSON.stringify(twitterBackup, null, 2)),
]);
return pages;
};
exports.write = async function writePageContent ({ prod }) {
const [ pages, { siteInfo }, engines ] = await Promise.all([
fs.readJson(resolve('pages.json')),
fs.readJson(resolve('package.json')),
getEngines(prod),
]);
await Promise.map(pages, async (page) => {
var data = {
...page,
meta: page,
page: {
domain: siteInfo.domain,
title: page.title
? (page.title + (page.subtitle ? ', ' + page.subtitle : '') + ' :: ' + siteInfo.title)
: siteInfo.title,
},
local: {
cwd: page.cwd,
root: ROOT,
basename: path.basename(page.input),
},
pages,
};
const html = engines[page.engine](data.source, data).toString();
const json = page.json && {
url: page.fullurl,
title: page.title,
subtitle: page.subtitle,
description: page.description,
tweets: page.tweets,
images: page.images,
dateCreated: page.dateCreated,
dateModified: page.dateModified,
titlecard: page.titlecard,
};
await fs.ensureDir(path.dirname(page.output));
await Promise.all([
fs.writeFile(page.output, Buffer.from(html)),
json && fs.writeFile(page.json, Buffer.from(prod ? JSON.stringify(json) : JSON.stringify(json, null, 2))),
]);
});
};
exports.write.prod = function writePageContentForProduction () { return exports.write({ prod: true }); };
/* Utility Functions **************************************************/
const tweeturl = /https?:\/\/twitter\.com\/(?:#!\/)?(?:\w+)\/status(?:es)?\/(\d+)/i;
const tweetidcheck = /^\d+$/;
function parseTweetId (tweetid) {
// we can't trust an id that isn't a string
if (typeof tweetid !== 'string') return false;
const match = tweetid.match(tweeturl);
if (match) return match[1];
if (tweetid.match(tweetidcheck)) return tweetid;
return false;
}
function resolve (fpath, ...args) {
if (fpath[0] === '/') fpath = fpath.slice(1);
return path.resolve(ROOT, fpath, ...args);
}
function getTwitterClient (config) {
if (!config) return () => [];
const client = new Twitter(config);
return (tweetids) => client
.get('statuses/lookup', { id: tweetids.join(','), tweet_mode: 'extended' })
.catch((e) => { log.error(e); return []; });
}
const stat = (f) => fs.stat(f).catch(() => undefined);