mirror of
https://github.com/GenderDysphoria/GenderDysphoria.fyi.git
synced 2025-01-31 07:16:17 +00:00
Working on analytics parsing
This commit is contained in:
parent
3f6077eb18
commit
98323f3316
4
.gitignore
vendored
4
.gitignore
vendored
@ -14,7 +14,9 @@ node_modules
|
|||||||
/terraform/*.tfstate*
|
/terraform/*.tfstate*
|
||||||
/terraform/.terraform
|
/terraform/.terraform
|
||||||
/terraform/files/*.zip
|
/terraform/files/*.zip
|
||||||
/analytics/RAW/*
|
/analytics/RAW*
|
||||||
/analytics/combined.log
|
/analytics/combined.log
|
||||||
|
/analytics/database*
|
||||||
/assets.json
|
/assets.json
|
||||||
/published
|
/published
|
||||||
|
/analytics/database.sqlite
|
||||||
|
131
analytics/fs.js
Normal file
131
analytics/fs.js
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
/**
|
||||||
|
*
|
||||||
|
* @twipped/utils
|
||||||
|
*
|
||||||
|
* Copyright (c) 2020, Jocelyn Badgley
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
* a copy of this software and associated documentation files (the
|
||||||
|
* "Software"), to deal in the Software without restriction, including
|
||||||
|
* without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
* permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
* the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||||
|
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
'use strict';
|
||||||
|
|
||||||
|
Object.defineProperty(exports, '__esModule', { value: true });
|
||||||
|
|
||||||
|
var path = require('path');
|
||||||
|
var fs = require('fs');
|
||||||
|
var stream = require('stream');
|
||||||
|
var util = require('util');
|
||||||
|
|
||||||
|
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
|
||||||
|
|
||||||
|
var stream__default = /*#__PURE__*/_interopDefaultLegacy(stream);
|
||||||
|
|
||||||
|
const pipeline = util.promisify(stream__default['default'].pipeline);
|
||||||
|
const mkdir = (f, recursive = true) => fs.promises.mkdir(f, {
|
||||||
|
recursive
|
||||||
|
});
|
||||||
|
const exists = f => fs.promises.access(f).then(() => true, () => false);
|
||||||
|
const stat = f => fs.promises.stat(f).catch(() => null);
|
||||||
|
const linkStat = f => fs.promises.lstat(f).catch(() => null);
|
||||||
|
async function isWritable(file) {
|
||||||
|
try {
|
||||||
|
await fs.promises.access(file, fs.constants.F_OK | fs.constants.W_OK);
|
||||||
|
return true;
|
||||||
|
} catch (err) {
|
||||||
|
if (err.code === 'ENOENT') {
|
||||||
|
return await fs.promises.access(path.dirname(file), fs.constants.F_OK | fs.constants.W_OK).then(() => true, () => false);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async function touch(file) {
|
||||||
|
const stats = await linkStat(file);
|
||||||
|
|
||||||
|
if (stats) {
|
||||||
|
if (stats.isDirectory()) return; // nothing to do
|
||||||
|
|
||||||
|
return await fs.promises.utimes(file, new Date(), new Date());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(await exists(path.dirname(file)))) await mkdir(path.dirname(file));
|
||||||
|
await fs.promises.writeFile(file, '');
|
||||||
|
}
|
||||||
|
async function remove(file) {
|
||||||
|
const stats = await linkStat(file);
|
||||||
|
if (!stats) return;
|
||||||
|
if (stats.isDirectory()) return fs.promises.rmdir(file, {
|
||||||
|
recursive: true
|
||||||
|
});
|
||||||
|
return fs.promises.unlink(file);
|
||||||
|
}
|
||||||
|
async function writeJson(file, object, options) {
|
||||||
|
const {
|
||||||
|
replacer,
|
||||||
|
spaces,
|
||||||
|
...ops
|
||||||
|
} = {
|
||||||
|
encoding: 'utf8',
|
||||||
|
...options
|
||||||
|
};
|
||||||
|
await fs.promises.writeFile(file, `${JSON.stringify(object, replacer, spaces)}\n`, ops);
|
||||||
|
}
|
||||||
|
const writeJSON = writeJson;
|
||||||
|
async function readJson(file, options) {
|
||||||
|
const {
|
||||||
|
reviver,
|
||||||
|
quiet,
|
||||||
|
...ops
|
||||||
|
} = {
|
||||||
|
encoding: 'utf8',
|
||||||
|
...options
|
||||||
|
};
|
||||||
|
const content = await fs.promises.readFile(file, ops);
|
||||||
|
|
||||||
|
try {
|
||||||
|
return JSON.parse(stripBom(content), reviver);
|
||||||
|
} catch (err) {
|
||||||
|
if (!quiet) throw err;
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const readJSON = readJson;
|
||||||
|
|
||||||
|
function stripBom(content) {
|
||||||
|
if (Buffer.isBuffer(content)) {
|
||||||
|
content = content.toString('utf8');
|
||||||
|
}
|
||||||
|
|
||||||
|
return content.replace(/^\uFEFF/, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
exports.exists = exists;
|
||||||
|
exports.isWritable = isWritable;
|
||||||
|
exports.linkStat = linkStat;
|
||||||
|
exports.mkdir = mkdir;
|
||||||
|
exports.pipeline = pipeline;
|
||||||
|
exports.readJSON = readJSON;
|
||||||
|
exports.readJson = readJson;
|
||||||
|
exports.remove = remove;
|
||||||
|
exports.stat = stat;
|
||||||
|
exports.touch = touch;
|
||||||
|
exports.writeJSON = writeJSON;
|
||||||
|
exports.writeJson = writeJson;
|
@ -1,13 +1,20 @@
|
|||||||
/* eslint no-console:0 */
|
/* eslint no-console:0 */
|
||||||
|
|
||||||
const util = require('util');
|
|
||||||
const path = require('path');
|
const path = require('path');
|
||||||
const { URL } = require('url');
|
const { URL } = require('url');
|
||||||
const CloudFrontParser = require('cloudfront-log-parser');
|
const CloudFrontParser = require('cloudfront-log-parser');
|
||||||
const parseUA = require('ua-parser-js');
|
const parseUA = require('ua-parser-js');
|
||||||
const format = require('date-fns/format');
|
const format = require('date-fns/format');
|
||||||
const split = require('split2');
|
const zlib = require('zlib');
|
||||||
var through = require('through2');
|
const { pipeline } = require('./fs');
|
||||||
|
const { Readable, Transform, Writable } = require('stream');
|
||||||
|
const { open: opensql } = require('sqlite');
|
||||||
|
const sqlite3 = require('sqlite3');
|
||||||
|
const sql = require('./sql-tag');
|
||||||
|
|
||||||
|
let fs = require('fs');
|
||||||
|
fs = { ...fs, ...fs.promises };
|
||||||
|
|
||||||
|
|
||||||
function url (input) {
|
function url (input) {
|
||||||
try {
|
try {
|
||||||
@ -18,27 +25,111 @@ function url (input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function asyncthrough (...args) {
|
// function asyncthrough (...args) {
|
||||||
const [ fn, donefn ] = args;
|
// const [ fn, donefn ] = args;
|
||||||
|
|
||||||
args[0] = function (file, enc, next) {
|
// args[0] = function (file, enc, next) {
|
||||||
fn(this, file, enc).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); });
|
// fn(this, file, enc).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); });
|
||||||
};
|
// };
|
||||||
|
|
||||||
if (donefn) {
|
// if (donefn) {
|
||||||
args[1] = function (next) {
|
// args[1] = function (next) {
|
||||||
donefn(this).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); });
|
// donefn(this).then(() => next(), (err) => { console.error(err, 'Error thrown'); next(err); });
|
||||||
};
|
// };
|
||||||
}
|
// }
|
||||||
|
|
||||||
return through.obj(...args);
|
// return through.obj(...args);
|
||||||
}
|
// }
|
||||||
|
|
||||||
const parser = new CloudFrontParser({ format: 'web' });
|
const parser = new CloudFrontParser({ format: 'web' });
|
||||||
|
|
||||||
process.stdin
|
|
||||||
.pipe(parser)
|
async function* loadFiles () {
|
||||||
.pipe(asyncthrough(async (stream, row) => {
|
const dir = path.resolve(__dirname, 'RAW');
|
||||||
|
for await (const f of await fs.opendir(dir)) {
|
||||||
|
if (!f.isFile()) continue;
|
||||||
|
const fpath = path.resolve(dir, f.name);
|
||||||
|
const file = path.parse(fpath);
|
||||||
|
if (file.ext !== '.gz') continue;
|
||||||
|
// console.log(file);
|
||||||
|
const filestream = fs.createReadStream(fpath).pipe(zlib.createGunzip());
|
||||||
|
for await (const chunk of filestream) {
|
||||||
|
yield chunk;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(async () => {
|
||||||
|
// open the database
|
||||||
|
const db = await opensql({
|
||||||
|
filename: path.resolve(__dirname, 'database.sqlite'),
|
||||||
|
driver: sqlite3.Database,
|
||||||
|
});
|
||||||
|
|
||||||
|
await db.run(sql`
|
||||||
|
CREATE TABLE IF NOT EXISTS records (
|
||||||
|
dts INTEGER,
|
||||||
|
ip TEXT,
|
||||||
|
tid INTEGER,
|
||||||
|
url TEXT,
|
||||||
|
referrer TEXT,
|
||||||
|
referrer_host TEXT,
|
||||||
|
client_start INTEGER,
|
||||||
|
client_end INTEGER,
|
||||||
|
duration INTEGER,
|
||||||
|
language TEXT,
|
||||||
|
scrolled INTEGER,
|
||||||
|
max_scroll INTEGER,
|
||||||
|
page_height INTEGER,
|
||||||
|
viewport_height INTEGER,
|
||||||
|
browser TEXT,
|
||||||
|
browser_version INTEGER,
|
||||||
|
os TEXT,
|
||||||
|
device_type TEXT,
|
||||||
|
device TEXT
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
|
||||||
|
await db.exec(sql`
|
||||||
|
CREATE UNIQUE INDEX IF NOT EXISTS entries ON records (
|
||||||
|
dts,
|
||||||
|
ip,
|
||||||
|
tid
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
|
||||||
|
const stmt = await db.prepare(sql`
|
||||||
|
REPLACE INTO records VALUES (
|
||||||
|
:dts,
|
||||||
|
:ip,
|
||||||
|
:tid,
|
||||||
|
:url,
|
||||||
|
:referrer,
|
||||||
|
:referrer_host,
|
||||||
|
:client_start,
|
||||||
|
:client_end,
|
||||||
|
:duration,
|
||||||
|
:language,
|
||||||
|
:scrolled,
|
||||||
|
:max_scroll,
|
||||||
|
:page_height,
|
||||||
|
:viewport_height,
|
||||||
|
:browser,
|
||||||
|
:browser_version,
|
||||||
|
:os,
|
||||||
|
:device_type,
|
||||||
|
:device
|
||||||
|
);
|
||||||
|
`);
|
||||||
|
|
||||||
|
await pipeline(
|
||||||
|
Readable.from(loadFiles()),
|
||||||
|
parser,
|
||||||
|
new Transform({
|
||||||
|
readableObjectMode: true,
|
||||||
|
writableObjectMode: true,
|
||||||
|
transform (row, encoding, done) {
|
||||||
|
console.log(row);
|
||||||
// filter out OPTIONS calls
|
// filter out OPTIONS calls
|
||||||
if (row['cs-method'] === 'OPTIONS') return null;
|
if (row['cs-method'] === 'OPTIONS') return null;
|
||||||
|
|
||||||
@ -64,18 +155,69 @@ process.stdin
|
|||||||
const sessionEnd = query.end === 'null' ? 0 : Number(query.end);
|
const sessionEnd = query.end === 'null' ? 0 : Number(query.end);
|
||||||
const duration = sessionEnd > sessionStart ? Math.floor((sessionEnd - sessionStart) / 1000) : null;
|
const duration = sessionEnd > sessionStart ? Math.floor((sessionEnd - sessionStart) / 1000) : null;
|
||||||
|
|
||||||
stream.push(JSON.stringify({
|
let {
|
||||||
|
language,
|
||||||
|
viewed,
|
||||||
|
max_scroll,
|
||||||
|
page_height,
|
||||||
|
viewport_height,
|
||||||
|
} = query;
|
||||||
|
|
||||||
|
max_scroll = parseInt(max_scroll, 10) || 0;
|
||||||
|
page_height = parseInt(page_height, 10) || 0;
|
||||||
|
viewport_height = parseInt(viewport_height, 10) || 0;
|
||||||
|
|
||||||
|
const { pathname } = url(row.cs_referer) || {};
|
||||||
|
const { hostname: referrer_host, href: referrer } = url(query.referrer) || {};
|
||||||
|
|
||||||
|
const result = {
|
||||||
dts: `${row.date} ${row.time}`,
|
dts: `${row.date} ${row.time}`,
|
||||||
url: url(row.cs_referer),
|
ip: row.c_ip,
|
||||||
referer: url(query.referer),
|
tid: query.tid !== 'false' ? query.tid : null,
|
||||||
|
url: pathname,
|
||||||
|
referrer,
|
||||||
|
referrer_host,
|
||||||
client_start: format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss'),
|
client_start: format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss'),
|
||||||
client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null,
|
client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null,
|
||||||
duration,
|
duration,
|
||||||
useragent,
|
language,
|
||||||
query,
|
viewed,
|
||||||
original: row,
|
max_scroll,
|
||||||
}, null, 2));
|
page_height,
|
||||||
}))
|
viewport_height,
|
||||||
.pipe(process.stdout)
|
browser: useragent.browser.name,
|
||||||
;
|
browser_version: useragent.browser.major,
|
||||||
|
os: useragent.os.name + ' ' + useragent.os.version,
|
||||||
|
device_type: useragent.device && useragent.device.type || null,
|
||||||
|
device: useragent.device && useragent.device.vendor && useragent.device.vendor + ' ' + useragent.device.model || null,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.push(result);
|
||||||
|
done();
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
new Writable({
|
||||||
|
objectMode: true,
|
||||||
|
// highWaterMark: 2,
|
||||||
|
|
||||||
|
write (record, encoding, done) {
|
||||||
|
(async () => {
|
||||||
|
const params = Object.fromEntries(
|
||||||
|
Object.entries(record).map(([ k, v ]) => [ ':' + k, v ]),
|
||||||
|
);
|
||||||
|
await stmt.run(params);
|
||||||
|
process.stdout.write('.');
|
||||||
|
})().then(() => done(), done);
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
await db.close();
|
||||||
|
|
||||||
|
})().then(
|
||||||
|
() => process.exit(),
|
||||||
|
(err) => {
|
||||||
|
console.error(err.stack);
|
||||||
|
process.exit(1);
|
||||||
|
},
|
||||||
|
);
|
||||||
|
1262
analytics/package-lock.json
generated
1262
analytics/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,7 @@
|
|||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"sync": "aws s3 sync s3://gdbible-analytics/RAW ./RAW",
|
"sync": "aws s3 sync s3://gdbible-analytics/RAW ./RAW",
|
||||||
|
"empty": "aws s3 rm s3://gdbible-analytics/RAW --recursive",
|
||||||
"combine": "find RAW -name '*.gz' -exec cat '{}' ';'| zcat | sed '/^#/ d' > combined.log"
|
"combine": "find RAW -name '*.gz' -exec cat '{}' ';'| zcat | sed '/^#/ d' > combined.log"
|
||||||
},
|
},
|
||||||
"author": "Jocelyn Badgley <joc@twipped.com> (http://twipped.com/)",
|
"author": "Jocelyn Badgley <joc@twipped.com> (http://twipped.com/)",
|
||||||
@ -12,7 +13,12 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"cloudfront-log-parser": "~1.1.0",
|
"cloudfront-log-parser": "~1.1.0",
|
||||||
"date-fns": "~2.9.0",
|
"date-fns": "~2.9.0",
|
||||||
|
"glob-stream": "~6.1.0",
|
||||||
|
"named-placeholders": "~1.1.2",
|
||||||
"split2": "~3.2.2",
|
"split2": "~3.2.2",
|
||||||
|
"sqlite": "~4.0.19",
|
||||||
|
"sqlite3": "~5.0.2",
|
||||||
|
"stream-chain": "~2.2.4",
|
||||||
"through2": "~4.0.2",
|
"through2": "~4.0.2",
|
||||||
"ua-parser-js": "~0.7.21"
|
"ua-parser-js": "~0.7.21"
|
||||||
},
|
},
|
||||||
|
13
analytics/queries.sql
Normal file
13
analytics/queries.sql
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
SELECT
|
||||||
|
date(dts) as day,
|
||||||
|
count(DISTINCT IFNULL(tid, ip)) as tids
|
||||||
|
FROM records
|
||||||
|
GROUP BY date(dts);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
(duration / 60) as minutes,
|
||||||
|
COUNT(IFNULL(tid,ip)) as total
|
||||||
|
FROM records
|
||||||
|
WHERE duration > 1 AND duration < (60 * 30)
|
||||||
|
GROUP BY duration / 60
|
||||||
|
HAVING total > 5;
|
48
analytics/sql-tag.js
Normal file
48
analytics/sql-tag.js
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
|
||||||
|
const namedParams = require('named-placeholders')();
|
||||||
|
|
||||||
|
function stripIndent (input) {
|
||||||
|
if (Array.isArray(input)) return input.map(stripIndent).join('');
|
||||||
|
const match = input.match(/^[^\S\n]*(?=\S)/gm);
|
||||||
|
const indent = match && Math.min(...match.map((el) => el.length));
|
||||||
|
if (indent) {
|
||||||
|
const regexp = new RegExp(`^.{${indent}}`, 'gm');
|
||||||
|
input = input.replace(regexp, '');
|
||||||
|
}
|
||||||
|
|
||||||
|
return input;
|
||||||
|
}
|
||||||
|
|
||||||
|
function isObject (input, strict = false) {
|
||||||
|
if (!input) return false;
|
||||||
|
if (typeof input !== 'object') return false;
|
||||||
|
if (Array.isArray(input)) return false;
|
||||||
|
if (!strict) return true;
|
||||||
|
if (!(input instanceof Object)) return false;
|
||||||
|
if (input.constructor !== Object.prototype.constructor) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const isNotUndefinedOrNull = (input) => input !== null && typeof input !== 'undefined';
|
||||||
|
const valueOrEmpty = (input) => (isNotUndefinedOrNull(input) ? input : '');
|
||||||
|
|
||||||
|
function withData (data) {
|
||||||
|
return function (...args) {
|
||||||
|
const query = sql(...args);
|
||||||
|
return namedParams(query, data);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function sql (strings, ...values) {
|
||||||
|
const input = strings.reduce((str, chunk, i) => (
|
||||||
|
str + chunk + valueOrEmpty(values[i])
|
||||||
|
), '');
|
||||||
|
return stripIndent(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = exports = (...args) => {
|
||||||
|
if (args.length === 0 || (args.length === 1 && isObject(args[0]))) {
|
||||||
|
return withData(args[0] || {});
|
||||||
|
}
|
||||||
|
if (Array.isArray(args[0])) return sql(...args);
|
||||||
|
throw new TypeError('Unknown invocation of sql-tag');
|
||||||
|
};
|
3
js/_i.js
3
js/_i.js
@ -44,10 +44,11 @@
|
|||||||
|
|
||||||
const viewport_height = Math.max(window.document.documentElement.clientHeight, window.innerHeight || 0);
|
const viewport_height = Math.max(window.document.documentElement.clientHeight, window.innerHeight || 0);
|
||||||
const max_scroll = Math.max(SESSION_DATA.max_scroll, window.scrollY);
|
const max_scroll = Math.max(SESSION_DATA.max_scroll, window.scrollY);
|
||||||
|
const density = window.devicePixelRatio || 1;
|
||||||
|
|
||||||
const viewed = max_scroll === 0 ? 0 : Math.round(((max_scroll + viewport_height) / page_height) * 100);
|
const viewed = max_scroll === 0 ? 0 : Math.round(((max_scroll + viewport_height) / page_height) * 100);
|
||||||
|
|
||||||
Object.assign(SESSION_DATA, { page_height, viewport_height, max_scroll, viewed });
|
Object.assign(SESSION_DATA, { page_height, viewport_height, max_scroll, viewed, density });
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user