diff --git a/.gitignore b/.gitignore index bd1f0c9..9e0d664 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ node_modules /terraform/*.tfstate* /terraform/.terraform /terraform/files/*.zip -/analytics/RAW* +/analytics/*.gz /analytics/combined.log /analytics/database* /assets.json diff --git a/analytics/package.json b/analytics/package.json index 0fed993..9cfe17a 100644 --- a/analytics/package.json +++ b/analytics/package.json @@ -4,9 +4,10 @@ "description": "", "main": "index.js", "scripts": { + "start": "node index.js", "sync": "aws s3 sync s3://gdbible-analytics/RAW ./RAW", "empty": "aws s3 rm s3://gdbible-analytics/RAW --recursive", - "combine": "find RAW -name '*.gz' -exec cat '{}' ';'| zcat | sed '/^#/ d' > combined.log" + "combine": "find RAW_ -name '*.gz' -exec cat '{}' ';' > combined.log" }, "author": "Jocelyn Badgley (http://twipped.com/)", "license": "MIT", @@ -15,6 +16,7 @@ "date-fns": "~2.9.0", "glob-stream": "~6.1.0", "named-placeholders": "~1.1.2", + "readable-stream": "~3.6.0", "split2": "~3.2.2", "sqlite": "~4.0.19", "sqlite3": "~5.0.2", diff --git a/analytics/queries.sql b/analytics/queries.sql index c074ea4..0944230 100644 --- a/analytics/queries.sql +++ b/analytics/queries.sql @@ -14,7 +14,17 @@ HAVING total > 5; SELECT referrer_host, count(DISTINCT IFNULL(tid, ip)) as tids, referrer FROM records -GROUP BY referrer_host; +WHERE date(dts) > date('now', '-1 month') +AND referrer_host != 'genderdysphoria.fyi' +GROUP BY referrer_host +ORDER BY tids DESC; + +SELECT referrer_host, count(DISTINCT IFNULL(tid, ip)) as tids, referrer +FROM records +WHERE date(dts) > date('now', '-1 day') +AND INSTR(referrer_host, 'tiktok') +GROUP BY referrer_host +ORDER BY tids DESC; SELECT COUNT(IFNULL(tid,ip)) as total, referrer FROM records diff --git a/analytics/sync.js b/analytics/sync.js new file mode 100644 index 0000000..38e2138 --- /dev/null +++ b/analytics/sync.js @@ -0,0 +1,43 @@ +var Readable = require('readable-stream').Readable; + +function identity (_in) { + return _in; +} + +/** + * Returns a stream for any paged AWS function + * you can optionally provide a mapping function + * like S3::listObjectsV2() + * + * @param {function} req - a non executed AWS function + * @param {function} fn - a function that selects/maps the results + * @param {object} opts - stream options + */ +function s3PageStream (req, fn, opts) { + opts = Object.assign({}, opts, { read, objectMode: true }); + if (!fn) fn = identity; + + var stream = new Readable(opts); + + return stream; + + function read () { + if (!req) return; + + var _req = req; + req = null; // poor man's once! + _req.send(page_handler); + } + + function page_handler (e, data) { + if (e) return stream.destroy(e); + data.Contents.forEach((obj) => { + stream.push(fn(obj)); + }); + + var nextPage = this.hasNextPage() ? this.nextPage() : null; + if (nextPage) nextPage.send(page_handler); + else stream.push(null); + } +} +