From 39a1aa2f6f9afeff8e2dfcd08eb86d4b3f690246 Mon Sep 17 00:00:00 2001
From: "Jocelyn Badgley (Twipped)" <joc@twipped.com>
Date: Fri, 7 May 2021 11:10:29 -0700
Subject: [PATCH] Analytics tweaks

---
 .gitignore             |  2 +-
 analytics/package.json |  4 +++-
 analytics/queries.sql  | 12 +++++++++++-
 analytics/sync.js      | 43 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 analytics/sync.js

diff --git a/.gitignore b/.gitignore
index bd1f0c9..9e0d664 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,7 @@ node_modules
 /terraform/*.tfstate*
 /terraform/.terraform
 /terraform/files/*.zip
-/analytics/RAW*
+/analytics/*.gz
 /analytics/combined.log
 /analytics/database*
 /assets.json
diff --git a/analytics/package.json b/analytics/package.json
index 0fed993..9cfe17a 100644
--- a/analytics/package.json
+++ b/analytics/package.json
@@ -4,9 +4,10 @@
   "description": "",
   "main": "index.js",
   "scripts": {
+    "start": "node index.js",
     "sync": "aws s3 sync s3://gdbible-analytics/RAW ./RAW",
     "empty": "aws s3 rm s3://gdbible-analytics/RAW --recursive",
-    "combine": "find RAW -name '*.gz' -exec cat '{}' ';'| zcat | sed '/^#/ d' > combined.log"
+    "combine": "find RAW_ -name '*.gz' -exec cat '{}' ';' > combined.log"
   },
   "author": "Jocelyn Badgley <joc@twipped.com> (http://twipped.com/)",
   "license": "MIT",
@@ -15,6 +16,7 @@
     "date-fns": "~2.9.0",
     "glob-stream": "~6.1.0",
     "named-placeholders": "~1.1.2",
+    "readable-stream": "~3.6.0",
     "split2": "~3.2.2",
     "sqlite": "~4.0.19",
     "sqlite3": "~5.0.2",
diff --git a/analytics/queries.sql b/analytics/queries.sql
index c074ea4..0944230 100644
--- a/analytics/queries.sql
+++ b/analytics/queries.sql
@@ -14,7 +14,17 @@ HAVING total > 5;
 
 SELECT referrer_host, count(DISTINCT IFNULL(tid, ip)) as tids, referrer
 FROM records
-GROUP BY referrer_host;
+WHERE date(dts) > date('now', '-1 month')
+AND referrer_host != 'genderdysphoria.fyi'
+GROUP BY referrer_host
+ORDER BY tids DESC;
+
+SELECT referrer_host, count(DISTINCT IFNULL(tid, ip)) as tids, referrer
+FROM records
+WHERE date(dts) > date('now', '-1 day')
+AND INSTR(referrer_host, 'tiktok')
+GROUP BY referrer_host
+ORDER BY tids DESC;
 
 SELECT COUNT(IFNULL(tid,ip)) as total, referrer
 FROM records
diff --git a/analytics/sync.js b/analytics/sync.js
new file mode 100644
index 0000000..38e2138
--- /dev/null
+++ b/analytics/sync.js
@@ -0,0 +1,43 @@
+var Readable = require('readable-stream').Readable;
+
+function identity (_in) {
+  return _in;
+}
+
+/**
+ * Returns a stream for any paged AWS function
+ * you can optionally provide a mapping function
+ * like S3::listObjectsV2()
+ *
+ * @param {function} req - a non executed AWS function
+ * @param {function} fn - a function that selects/maps the results
+ * @param {object} opts - stream options
+ */
+function s3PageStream (req, fn, opts) {
+  opts = Object.assign({}, opts, { read, objectMode: true });
+  if (!fn) fn = identity;
+
+  var stream = new Readable(opts);
+
+  return stream;
+
+  function read () {
+    if (!req) return;
+
+    var _req = req;
+    req = null; // poor man's once!
+    _req.send(page_handler);
+  }
+
+  function page_handler (e, data) {
+    if (e) return stream.destroy(e);
+    data.Contents.forEach((obj) => {
+      stream.push(fn(obj));
+    });
+
+    var nextPage = this.hasNextPage() ? this.nextPage() : null;
+    if (nextPage) nextPage.send(page_handler);
+    else stream.push(null);
+  }
+}
+