mirror of
https://github.com/GenderDysphoria/GenderDysphoria.fyi.git
synced 2025-01-30 23:06:18 +00:00
More analytics work
This commit is contained in:
parent
1cc9c88a3a
commit
514acab9ee
10
.eslintrc
10
.eslintrc
@ -2,11 +2,11 @@
|
||||
"extends": "twipped/node",
|
||||
"rules": {
|
||||
"node/no-unpublished-require": 0,
|
||||
'indent': [ 2, 2, {
|
||||
'MemberExpression': 1,
|
||||
"indent": [ 2, 2, {
|
||||
"MemberExpression": 1
|
||||
} ],
|
||||
'node/no-unsupported-features/es-syntax': [ 'error' ],
|
||||
'node/no-unsupported-features/es-builtins': [ 'error' ],
|
||||
'node/no-unsupported-features/node-builtins': [ 'error' ],
|
||||
"node/no-unsupported-features/es-syntax": [ "error" ],
|
||||
"node/no-unsupported-features/es-builtins": [ "error" ],
|
||||
"node/no-unsupported-features/node-builtins": [ "error" ]
|
||||
}
|
||||
}
|
||||
|
@ -129,15 +129,14 @@ async function* loadFiles () {
|
||||
readableObjectMode: true,
|
||||
writableObjectMode: true,
|
||||
transform (row, encoding, done) {
|
||||
console.log(row);
|
||||
// filter out OPTIONS calls
|
||||
if (row['cs-method'] === 'OPTIONS') return null;
|
||||
if (row['cs-method'] === 'OPTIONS') return done();
|
||||
|
||||
// I only care about the pixel hits, nothing else.
|
||||
if (row['cs-uri-stem'] !== '/i') return null;
|
||||
if (row['cs-uri-stem'] !== '/i') return done();
|
||||
|
||||
// this isn't an analytics event
|
||||
if (row['cs-referer'] === '-') return null;
|
||||
if (row['cs-referer'] === '-') return done();
|
||||
|
||||
row = Object.fromEntries(Object.entries(row).map(([ k, v ]) => [ k.replace(/-/g, '_'), v ]));
|
||||
|
||||
@ -147,7 +146,7 @@ async function* loadFiles () {
|
||||
;
|
||||
|
||||
// we didn't get analytics data from this load, ignore it
|
||||
if (!query.start) return null;
|
||||
if (!query.start) return done();
|
||||
|
||||
const useragent = parseUA(row.cs_user_agent);
|
||||
|
||||
|
@ -5,7 +5,7 @@ data "aws_region" "current" {}
|
||||
resource "aws_cloudwatch_log_group" "ipixel_results" {
|
||||
name = "/aws/ipixel/${var.site}"
|
||||
|
||||
retention_in_days = 30
|
||||
retention_in_days = 90
|
||||
|
||||
tags = {
|
||||
Site = var.site,
|
||||
|
@ -1,3 +1,12 @@
|
||||
{
|
||||
"extends": "airbnb-base"
|
||||
"extends": "twipped/node",
|
||||
"rules": {
|
||||
"node/no-unpublished-require": 0,
|
||||
"indent": [ 2, 2, {
|
||||
"MemberExpression": 1
|
||||
} ],
|
||||
"node/no-unsupported-features/es-syntax": [ "error" ],
|
||||
"node/no-unsupported-features/es-builtins": [ "error" ],
|
||||
"node/no-unsupported-features/node-builtins": [ "error" ]
|
||||
}
|
||||
}
|
||||
|
2
terraform/lambda/index.js
Normal file
2
terraform/lambda/index.js
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
module.exports = exports = require('./src/index');
|
10
terraform/lambda/package-lock.json
generated
10
terraform/lambda/package-lock.json
generated
@ -305,6 +305,11 @@
|
||||
"which": "^2.0.1"
|
||||
}
|
||||
},
|
||||
"date-fns": {
|
||||
"version": "2.18.0",
|
||||
"resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.18.0.tgz",
|
||||
"integrity": "sha512-NYyAg4wRmGVU4miKq5ivRACOODdZRY3q5WLmOJSq8djyzftYphU7dTHLcEtLqEvfqMKQ0jVv91P4BAwIjsXIcw=="
|
||||
},
|
||||
"debug": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
|
||||
@ -1547,6 +1552,11 @@
|
||||
"integrity": "sha512-4dbzIzqvjtgiM5rw1k5rEHtBANKmdudhGyBEajN01fEyhaAIhsoKNy6y7+IN93IfpFtwY9iqi7kD+xwKhQsNJA==",
|
||||
"dev": true
|
||||
},
|
||||
"ua-parser-js": {
|
||||
"version": "0.7.24",
|
||||
"resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-0.7.24.tgz",
|
||||
"integrity": "sha512-yo+miGzQx5gakzVK3QFfN0/L9uVhosXBBO7qmnk7c2iw1IhL212wfA3zbnI54B0obGwC/5NWub/iT9sReMx+Fw=="
|
||||
},
|
||||
"uri-js": {
|
||||
"version": "4.4.1",
|
||||
"resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
|
||||
|
@ -2,7 +2,9 @@
|
||||
"name": "cloudfront-logs",
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"aws-sdk": "*"
|
||||
"aws-sdk": "*",
|
||||
"date-fns": "~2.18.0",
|
||||
"ua-parser-js": "~0.7.24"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "*",
|
||||
@ -11,5 +13,8 @@
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "eslint ."
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.14.0"
|
||||
}
|
||||
}
|
||||
|
@ -2,9 +2,20 @@ const { gunzip } = require('zlib');
|
||||
const { promisify } = require('util');
|
||||
const { S3 } = require('aws-sdk');
|
||||
const { unescape } = require('querystring');
|
||||
const parseUA = require('ua-parser-js');
|
||||
const format = require('date-fns/format');
|
||||
const { URL } = require('url');
|
||||
|
||||
const gunzipAsync = promisify(gunzip);
|
||||
|
||||
function url (input) {
|
||||
try {
|
||||
const { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username } = new URL(input); // eslint-disable-line max-len
|
||||
return { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username };
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing the line containing the version.
|
||||
//
|
||||
@ -31,7 +42,7 @@ const parseFields = (line) => {
|
||||
if (!line.startsWith('#Fields:')) {
|
||||
throw new Error(`Invalid fields line '${line}'`);
|
||||
} else {
|
||||
return line.match(/[\w()-]+(\s|$)/g).map(field => (
|
||||
return line.match(/[\w()-]+(\s|$)/g).map((field) => (
|
||||
// Strip parentheses and remove unecessary abbreviations in field names
|
||||
field.replace(/\(([^)]+)\)/, '-$1').replace(/^(c-|cs-|sc-)/, '').trim().toLowerCase()
|
||||
));
|
||||
@ -41,7 +52,7 @@ const parseFields = (line) => {
|
||||
// Unescape value twice (because fuck you that's why).
|
||||
// https://forums.aws.amazon.com/thread.jspa?threadID=134017
|
||||
//
|
||||
const decode = value => unescape(unescape(value));
|
||||
const decode = (value) => unescape(unescape(value));
|
||||
|
||||
// Split up line and assign to corresponding field.
|
||||
//
|
||||
@ -49,11 +60,76 @@ const parseLine = (line, fields) => {
|
||||
if (line.startsWith('#')) {
|
||||
throw new Error(`Invalid log line '${line}'`);
|
||||
} else {
|
||||
return line.split('\t').reduce((object, section, index) => {
|
||||
let row = line.split('\t').reduce((object, section, index) => {
|
||||
const result = object;
|
||||
if (section !== '-') result[fields[index]] = decode(section); // Skip missing fields
|
||||
return result;
|
||||
}, {});
|
||||
|
||||
|
||||
// filter out OPTIONS calls
|
||||
if (row.method === 'OPTIONS') return;
|
||||
|
||||
// I only care about the pixel hits, nothing else.
|
||||
if (row['uri-stem'] !== '/i') return;
|
||||
|
||||
// this isn't an analytics event
|
||||
if (!row.referer) return;
|
||||
|
||||
row = Object.fromEntries(Object.entries(row).map(([ k, v ]) => [ k.replace(/-/g, '_'), v ]));
|
||||
|
||||
const query = (row.uri_query)
|
||||
? Object.fromEntries(new URLSearchParams(row.uri_query))
|
||||
: {}
|
||||
;
|
||||
|
||||
const useragent = parseUA(row.user_agent);
|
||||
|
||||
const sessionStart = Number(query.start);
|
||||
const sessionEnd = query.end === 'null' ? 0 : Number(query.end);
|
||||
const duration = sessionEnd > sessionStart ? Math.floor((sessionEnd - sessionStart) / 1000) : null;
|
||||
|
||||
let {
|
||||
language,
|
||||
viewed,
|
||||
max_scroll,
|
||||
page_height,
|
||||
viewport_height,
|
||||
} = query;
|
||||
|
||||
max_scroll = parseInt(max_scroll, 10) || 0;
|
||||
page_height = parseInt(page_height, 10) || 0;
|
||||
viewport_height = parseInt(viewport_height, 10) || 0;
|
||||
|
||||
const { pathname } = url(row.referer) || {};
|
||||
const { hostname: referrer_host, href: referrer } = url(query.referrer) || {};
|
||||
|
||||
const result = {
|
||||
dts: `${row.date} ${row.time}`,
|
||||
ip: row.ip,
|
||||
tid: query.tid !== 'false' ? query.tid : null,
|
||||
url: pathname,
|
||||
referrer,
|
||||
referrer_host,
|
||||
client_start: format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss'),
|
||||
client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null,
|
||||
duration,
|
||||
language,
|
||||
viewed,
|
||||
max_scroll,
|
||||
page_height,
|
||||
viewport_height,
|
||||
browser: useragent.browser.name,
|
||||
browser_version: useragent.browser.major,
|
||||
os: useragent.os.name + ' ' + useragent.os.version,
|
||||
device_type: useragent.device && useragent.device.type || null,
|
||||
device: useragent.device && useragent.device.vendor && useragent.device.vendor + ' ' + useragent.device.model || null,
|
||||
useragent,
|
||||
query,
|
||||
original: row,
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
@ -79,5 +155,9 @@ exports.parseLogFile = async ({ bucket, key, region }) => {
|
||||
// Shift next line containing fields format and parse it for validation
|
||||
const fields = parseFields(lines.shift());
|
||||
|
||||
return lines.map(line => parseLine(line, fields));
|
||||
console.log(`Found ${lines.length} rows to parse`); // eslint-disable-line no-console
|
||||
const rows = lines.map((line) => parseLine(line, fields)).filter(Boolean);
|
||||
console.log(`Produced ${rows.length} results`);
|
||||
console.log('Sample', rows[0]);
|
||||
return rows;
|
||||
};
|
||||
|
@ -15,7 +15,7 @@ const groupBy = (array, key) => (
|
||||
if (result[item[key]]) {
|
||||
result[item[key]].push(item);
|
||||
} else if (item[key]) {
|
||||
result[item[key]] = [item];
|
||||
result[item[key]] = [ item ];
|
||||
}
|
||||
return result;
|
||||
}, {})
|
||||
@ -54,7 +54,7 @@ const describeLogStream = async (logStreamName) => {
|
||||
// Extend the original record with some additional fields
|
||||
// and encapsule records into CloudWatch Logs event.
|
||||
//
|
||||
const buildlogEvents = records => (
|
||||
const buildlogEvents = (records) => (
|
||||
records.map((record) => {
|
||||
const payload = record;
|
||||
payload.name = 'logs:cloudfront';
|
||||
|
@ -31,7 +31,7 @@ resource "aws_s3_bucket_notification" "ipixel_logs" {
|
||||
|
||||
data "archive_file" "ipixel_parser" {
|
||||
type = "zip"
|
||||
source_dir = "${path.module}/lambda/src"
|
||||
source_dir = "${path.module}/lambda"
|
||||
output_path = ".terraform/tmp/lambda/ipixel_parser.zip"
|
||||
}
|
||||
|
||||
@ -40,8 +40,7 @@ resource "aws_lambda_function" "ipixel_parser" {
|
||||
|
||||
runtime = "nodejs12.x"
|
||||
handler = "index.handler"
|
||||
timeout = "24"
|
||||
memory_size = "512"
|
||||
timeout = 5
|
||||
reserved_concurrent_executions = 3
|
||||
|
||||
environment {
|
||||
@ -60,5 +59,8 @@ resource "aws_lambda_function" "ipixel_parser" {
|
||||
Role = "ipixel"
|
||||
}
|
||||
|
||||
depends_on = [aws_cloudwatch_log_group.ipixel_parser_logs]
|
||||
depends_on = [
|
||||
aws_cloudwatch_log_group.ipixel_parser_logs,
|
||||
aws_cloudwatch_log_group.ipixel_results,
|
||||
]
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user