mirror of
https://github.com/GenderDysphoria/GenderDysphoria.fyi.git
synced 2025-01-31 07:16:17 +00:00
196 lines
6.3 KiB
JavaScript
196 lines
6.3 KiB
JavaScript
const { chunk, uniq, uniqBy, difference } = require('lodash');
|
|
const fs = require('fs-extra');
|
|
const { resolve } = require('./resolve');
|
|
const log = require('fancy-log');
|
|
const tweetparse = require('./lib/tweetparse');
|
|
const Twitter = require('twitter-lite');
|
|
const { hasOwn } = require('./lib/util');
|
|
|
|
function applyI18N (original_tweet, twitter_i18n) {
|
|
const id = original_tweet.id_str;
|
|
// Make a shallow copy
|
|
const tweet = Object.assign({}, original_tweet);
|
|
|
|
// Do we have a trnslation for this tweet?
|
|
if (twitter_i18n[id] === undefined) {
|
|
// If not, delete any translation fields just in case
|
|
delete tweet.html_i18n;
|
|
delete tweet.full_text_i18n;
|
|
} else {
|
|
// If yes, add the translations
|
|
const originalLang = tweet.lang || 'x-original';
|
|
tweet.full_text_i18n = twitter_i18n[id].full_text_i18n;
|
|
if (originalLang in tweet.full_text_i18n && tweet.full_text_i18n[originalLang] !== tweet.full_text) {
|
|
log.warn('Original text not matching for tweet ' + id, { expected: tweet.full_text, got: tweet.full_text_i18n[originalLang] });
|
|
} else {
|
|
tweet.full_text_i18n[originalLang] = tweet.full_text;
|
|
}
|
|
}
|
|
|
|
// Return the tweet with the translations
|
|
return tweet;
|
|
}
|
|
|
|
module.exports = exports = async function tweets (pages) {
|
|
const [ twitter, twitterBackup, twitterCache ] = await Promise.all([
|
|
fs.readJson(resolve('twitter-config.json')).catch(() => null)
|
|
.then(getTwitterClient),
|
|
fs.readJson(resolve('twitter-backup.json')).catch(() => ({})),
|
|
fs.readJson(resolve('twitter-cache.json')).catch(() => ({})),
|
|
]);
|
|
|
|
let tweetsNeeded = [];
|
|
const tweetsPresent = Object.keys(twitterCache);
|
|
|
|
for (const page of pages) {
|
|
const tweetids = [ ...page.tweets ];
|
|
if (!tweetids.length) continue;
|
|
|
|
const missing = difference(uniq(tweetids), tweetsPresent);
|
|
tweetsNeeded.push(...missing);
|
|
}
|
|
|
|
tweetsNeeded = uniq(tweetsNeeded);
|
|
|
|
/* Load Missing Tweets **************************************************/
|
|
|
|
while (tweetsNeeded.length) {
|
|
// log('Fetching tweets: ' + tweetsNeeded.join(', '));
|
|
// const arriving = await Promise.all(chunk(tweetsNeeded, 99).map(twitter));
|
|
const arriving = [];
|
|
const tweetsRequested = tweetsNeeded;
|
|
tweetsNeeded = [];
|
|
const loaded = [];
|
|
for (const tweet of arriving.flat(1)) {
|
|
if (tweet.quoted_status_id_str && !twitterCache[tweet.quoted_status_id_str]) {
|
|
tweetsNeeded.push(tweet.quoted_status_id_str);
|
|
}
|
|
twitterBackup[tweet.id_str] = tweet;
|
|
twitterCache[tweet.id_str] = tweetparse(tweet);
|
|
loaded.push(tweet.id_str);
|
|
}
|
|
|
|
const absent = difference(tweetsRequested, loaded);
|
|
for (const id of absent) {
|
|
if (!id) continue;
|
|
if (!hasOwn(twitterBackup, id)) {
|
|
log.error('Could not find tweet ' + id);
|
|
continue;
|
|
}
|
|
const tweet = twitterBackup[id];
|
|
|
|
if (tweet.quoted_status_id_str && !twitterCache[tweet.quoted_status_id_str]) {
|
|
tweetsNeeded.push(tweet.quoted_status_id_str);
|
|
}
|
|
|
|
if (tweet) {
|
|
log('Pulled tweet from backup ' + id);
|
|
twitterCache[id] = tweetparse(twitterBackup[id]);
|
|
} else {
|
|
twitterCache[id] = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Apply Tweets to Pages **************************************************/
|
|
|
|
var twitterMedia = [];
|
|
|
|
function attachTweet (dict, tweetid) {
|
|
if (!hasOwn(twitterCache, tweetid) && twitterBackup[tweetid]) {
|
|
log.error(`Tweet ${tweetid} is missing from the cache but exists in backup? How did we get here?`);
|
|
twitterCache[tweetid] = tweetparse(twitterBackup[tweetid]);
|
|
return;
|
|
}
|
|
const tweet = twitterCache[tweetid];
|
|
if (!tweet) return;
|
|
dict[tweetid] = tweet;
|
|
twitterMedia.push( ...tweet.media );
|
|
|
|
if (tweet.quoted_status_id_str) attachTweet(dict, tweet.quoted_status_id_str);
|
|
}
|
|
|
|
// now loop through pages and substitute the tweet data for the ids
|
|
for (const page of pages) {
|
|
const tweetids = [ ...page.tweets ];
|
|
if (!tweetids.length) continue;
|
|
|
|
page.tweets = tweetids.reduce((dict, tweetid) => {
|
|
attachTweet(dict, tweetid);
|
|
return dict;
|
|
}, {});
|
|
}
|
|
|
|
twitterMedia = uniqBy(twitterMedia, 'output');
|
|
|
|
await Promise.all([
|
|
fs.writeFile(resolve('twitter-media.json'), JSON.stringify(twitterMedia, null, 2)),
|
|
fs.writeFile(resolve('twitter-cache.json'), JSON.stringify(twitterCache, null, 2)),
|
|
fs.writeFile(resolve('twitter-backup.json'), JSON.stringify(twitterBackup, null, 2)),
|
|
]);
|
|
|
|
return pages;
|
|
};
|
|
|
|
/* Utility Functions **************************************************/
|
|
|
|
function getTwitterClient (config) {
|
|
if (!config) return () => [];
|
|
const client = new Twitter(config);
|
|
return (tweetids) => client
|
|
.get('statuses/lookup', { id: tweetids.join(','), tweet_mode: 'extended' })
|
|
.catch((e) => { log.error(e); return []; });
|
|
}
|
|
|
|
const tweeturl = /https?:\/\/twitter\.com\/(?:#!\/)?(?:\w+)\/status(?:es)?\/(\d+)/i;
|
|
const tweetidcheck = /^\d+$/;
|
|
function parseTweetId (tweetid) {
|
|
// we can't trust an id that isn't a string
|
|
if (typeof tweetid !== 'string') return false;
|
|
|
|
const match = tweetid.match(tweeturl);
|
|
if (match) return match[1];
|
|
if (tweetid.match(tweetidcheck)) return tweetid;
|
|
return false;
|
|
}
|
|
|
|
exports.parseTweetId = parseTweetId;
|
|
|
|
exports.attachTweets = function (tweetids, tweets) {
|
|
function attachTweet (dict, tweetid) {
|
|
const tweet = tweets[tweetid];
|
|
if (!tweet) return;
|
|
dict[tweetid] = tweet;
|
|
|
|
if (tweet.quoted_status_id_str) attachTweet(dict, tweet.quoted_status_id_str);
|
|
}
|
|
|
|
return tweetids.reduce((dict, tweetid) => {
|
|
attachTweet(dict, tweetid);
|
|
return dict;
|
|
}, {});
|
|
};
|
|
|
|
exports.i18n = async function () {
|
|
const [ twitterBackup, twitterCache, twitterI18N ] = await Promise.all([
|
|
fs.readJson(resolve('twitter-backup.json')),
|
|
fs.readJson(resolve('twitter-cache.json')).catch(() => ({})),
|
|
fs.readJson(resolve('twitter-i18n.json')),
|
|
]);
|
|
|
|
const twitterCacheBkp = JSON.stringify(twitterCache, null, 2);
|
|
|
|
// Make sure no translation is forgotten
|
|
for (const id of Object.keys(twitterI18N)) {
|
|
if (twitterBackup[id]) {
|
|
twitterCache[id] = applyI18N(twitterBackup[id], twitterI18N);
|
|
twitterCache[id] = tweetparse(twitterCache[id]);
|
|
}
|
|
}
|
|
|
|
const twitterCacheJson = JSON.stringify(twitterCache, null, 2);
|
|
if (twitterCacheBkp !== twitterCacheJson) {
|
|
await fs.writeFile(resolve('twitter-cache.json'), twitterCacheJson);
|
|
}
|
|
};
|