diff --git a/.gitignore b/.gitignore index 1d8cd50..ab9b301 100644 --- a/.gitignore +++ b/.gitignore @@ -14,5 +14,6 @@ node_modules /terraform/*.tfstate* /terraform/.terraform /terraform/files/*.zip - +/analytics/RAW/* +/analytics/combined.log /assets.json diff --git a/analytics/.eslintrc b/analytics/.eslintrc new file mode 100644 index 0000000..0eea9ea --- /dev/null +++ b/analytics/.eslintrc @@ -0,0 +1,12 @@ +{ + "extends": "twipped/node", + "rules": { + "node/no-unpublished-require": 0, + 'indent': [ 2, 2, { + 'MemberExpression': 1, + } ], + 'node/no-unsupported-features/es-syntax': [ 'error' ], + 'node/no-unsupported-features/es-builtins': [ 'error' ], + 'node/no-unsupported-features/node-builtins': [ 'error' ], + } +} diff --git a/analytics/index.js b/analytics/index.js new file mode 100644 index 0000000..a7f8f07 --- /dev/null +++ b/analytics/index.js @@ -0,0 +1,81 @@ +/* eslint no-console:0 */ + +const util = require('util'); +const path = require('path'); +const { URL } = require('url'); +const CloudFrontParser = require('cloudfront-log-parser'); +const parseUA = require('ua-parser-js'); +const format = require('date-fns/format'); +const split = require('split2'); +var through = require('through2'); + +function url (input) { + try { + const { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username } = new URL(input); // eslint-disable-line max-len + return { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username }; + } catch (e) { + return null; + } +} + +function asyncthrough (...args) { + const [ fn, donefn ] = args; + + args[0] = function (file, enc, next) { + fn(this, file, enc).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); }); + }; + + if (donefn) { + args[1] = function (next) { + donefn(this).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); }); + }; + } + + return through.obj(...args); +} + +const parser = new CloudFrontParser({ format: 'web' }); + +process.stdin + .pipe(parser) + .pipe(asyncthrough(async (stream, row) => { + // filter out OPTIONS calls + if (row['cs-method'] === 'OPTIONS') return null; + + // I only care about the pixel hits, nothing else. + if (row['cs-uri-stem'] !== '/i') return null; + + // this isn't an analytics event + if (row['cs-referer'] === '-') return null; + + row = Object.fromEntries(Object.entries(row).map(([ k, v ]) => [ k.replace(/-/g, '_'), v ])); + + const query = (row.cs_uri_query === '-') + ? {} + : Object.fromEntries(new URLSearchParams(row.cs_uri_query)) + ; + + // we didn't get analytics data from this load, ignore it + if (!query.start) return null; + + const useragent = parseUA(row.cs_user_agent); + + const sessionStart = Number(query.start); + const sessionEnd = query.end === 'null' ? 0 : Number(query.end); + const duration = sessionEnd > sessionStart ? Math.floor((sessionEnd - sessionStart) / 1000) : null; + + stream.push(JSON.stringify({ + dts: `${row.date} ${row.time}`, + url: url(row.cs_referer), + referer: url(query.referer), + client_start: format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss'), + client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null, + duration, + useragent, + query, + original: row, + }, null, 2)); + })) + .pipe(process.stdout) +; + diff --git a/analytics/package-lock.json b/analytics/package-lock.json new file mode 100644 index 0000000..3c933c0 --- /dev/null +++ b/analytics/package-lock.json @@ -0,0 +1,72 @@ +{ + "name": "decorate", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "cloudfront-log-parser": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/cloudfront-log-parser/-/cloudfront-log-parser-1.1.0.tgz", + "integrity": "sha512-g1lxh8aW5ZrQ7/loX2/vLzz4SWefQhSvZw++wgoIx3aEugXHKyfYaWOXGS4pNNp9hi7JcXITxLYqBI2FY+jtgA==" + }, + "date-fns": { + "version": "2.9.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.9.0.tgz", + "integrity": "sha512-khbFLu/MlzLjEzy9Gh8oY1hNt/Dvxw3J6Rbc28cVoYWQaC1S3YI4xwkF9ZWcjDLscbZlY9hISMr66RFzZagLsA==" + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "readable-stream": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz", + "integrity": "sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA==", + "requires": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + } + }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + }, + "split2": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/split2/-/split2-3.2.2.tgz", + "integrity": "sha512-9NThjpgZnifTkJpzTZ7Eue85S49QwpNhZTq6GRJwObb6jnLFNGB7Qm73V5HewTROPyxD0C29xqmaI68bQtV+hg==", + "requires": { + "readable-stream": "^3.0.0" + } + }, + "string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "requires": { + "safe-buffer": "~5.2.0" + } + }, + "through2": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/through2/-/through2-4.0.2.tgz", + "integrity": "sha512-iOqSav00cVxEEICeD7TjLB1sueEL+81Wpzp2bY17uZjZN0pWZPuo4suZ/61VujxmqSGFfgOcNuTZ85QJwNZQpw==", + "requires": { + "readable-stream": "3" + } + }, + "ua-parser-js": { + "version": "0.7.21", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.21.tgz", + "integrity": "sha512-+O8/qh/Qj8CgC6eYBVBykMrNtp5Gebn4dlGD/kKXVkJNDwyrAwSIqwz8CDf+tsAIWVycKcku6gIXJ0qwx/ZXaQ==" + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" + } + } +} diff --git a/analytics/package.json b/analytics/package.json new file mode 100644 index 0000000..9176d75 --- /dev/null +++ b/analytics/package.json @@ -0,0 +1,23 @@ +{ + "name": "decorate", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "sync": "aws s3 sync s3://gdbible-analytics/RAW ./RAW", + "combine": "find RAW -name '*.gz' -exec cat '{}' ';'| zcat | sed '/^#/ d' > combined.log" + }, + "author": "Jocelyn Badgley (http://twipped.com/)", + "license": "MIT", + "dependencies": { + "cloudfront-log-parser": "~1.1.0", + "date-fns": "~2.9.0", + "split2": "~3.2.2", + "through2": "~4.0.2", + "ua-parser-js": "~0.7.21" + }, + "engines": { + "node": ">=12.14.0" + }, + "devDependencies": {} +}