diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..998e6bc --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,38 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/archive" { + version = "2.1.0" + hashes = [ + "h1:Rjd4bHMA69V+16tiriAUTW8vvqoljzNLmEaRBCgzpUs=", + "zh:033279ecbf60f565303222e9a6d26b50fdebe43aa1c6e8f565f09bb64d67c3fd", + "zh:0af998e42eb421c92e87202df5bfee436b3cfe553214394f08d786c72a9e3f70", + "zh:1183b661c692f38409a61eefb5d412167c246fcd9e49d4d61d6d910012d206ba", + "zh:5febb66f4a8207117f71dcd460fb9c81d3afb7b600b5e598cf517cf6e27cf4b2", + "zh:66135ce46d29d0ccf0e3b6a119423754ca334dbf4266bc989cce5b0b667b5fde", + "zh:6b9dc1a4f0a680bb650a7191784927f99675a8c8dd3c155ba821185f630db604", + "zh:91e249482c016ecf6bf8b83849964005cd2d0b4396688419cd1752809b46b23e", + "zh:a6a2e5f2f010c511e66174cb84ea18899e8bcfc1354c4b9fed972fdb131ffffc", + "zh:bb1f6abc76552a883732caff897ff7b07a91977a9b4bb97915f6aac54116bb65", + "zh:f05a9a63607f85719fde705f58d82ee16fa67f9158a5c3424c0216507631eddf", + "zh:fc603a05a06814387ffa4a054d1baee8ea6b5ab32c53cb73e90a5bf9a2616777", + ] +} + +provider "registry.terraform.io/hashicorp/aws" { + version = "3.30.0" + hashes = [ + "h1:PmKa3uxO2mDA5FJfGmpX+4e0x70vFLV5Ka9NxkuMpUo=", + "zh:01f562a6a31fe46a8ca74804f360e3452b26f71abc549ce1f0ab5a8af2484cdf", + "zh:25bacc5ed725051f0ab1f7d575e45c901e5b8e1d50da4156a31dda92b2b7e481", + "zh:349b79979d9169db614d8ebd1bc2e0caeb7a38dc816e261b8b2b4b5204615519", + "zh:5e41446acc54c6fc15e82c3fa14b72174b30eba81e0711ede297e5620c55a628", + "zh:68ad98f6d612bdc35a65d48950abc8e75c69decb49db28258ce8eeb5458586b7", + "zh:704603d65e8bac17d203b57c2db142c3134a91076e1b4a31c40f75eb3257dde8", + "zh:a362c700032b2db047d16007d52f28b3f216d32671b6b355d23bdaa082c66a4b", + "zh:bd197797b41268de3c93cad02b7c655dc0c4d8661abb37544ca049e6b1eccae6", + "zh:deb12ef0e3396a71d485977ddc14b695775f7937097ebf2b2f53ed348a4365e7", + "zh:ec8a7d0f02738f290107d39bf401d68ddce82a95cd9d998003f7e04b3a196411", + "zh:ffcc43b6c5e7f26c55e2a8c539d7370fca8042722400a3e06bdce4240bd7088a", + ] +} diff --git a/terraform/cert.tf b/terraform/cert.tf index a7bc915..2c2653f 100644 --- a/terraform/cert.tf +++ b/terraform/cert.tf @@ -23,16 +23,23 @@ resource "aws_acm_certificate" "cert" { } resource "aws_route53_record" "cert_validation" { - count = length(aws_acm_certificate.cert.subject_alternative_names) + 1 - zone_id = aws_route53_zone.zone.id - ttl = 60 + for_each = { + for dvo in aws_acm_certificate.cert.domain_validation_options : dvo.domain_name => { + name = dvo.resource_record_name + record = dvo.resource_record_value + type = dvo.resource_record_type + } + } - name = aws_acm_certificate.cert.domain_validation_options[count.index].resource_record_name - type = aws_acm_certificate.cert.domain_validation_options[count.index].resource_record_type - records = [aws_acm_certificate.cert.domain_validation_options[count.index].resource_record_value] + allow_overwrite = true + name = each.value.name + records = [each.value.record] + ttl = 60 + type = each.value.type + zone_id = aws_route53_zone.zone.id } resource "aws_acm_certificate_validation" "cert" { certificate_arn = aws_acm_certificate.cert.arn - validation_record_fqdns = aws_route53_record.cert_validation[*].fqdn + validation_record_fqdns = [for record in aws_route53_record.cert_validation : record.fqdn] } diff --git a/terraform/cloudwatch-logs.tf b/terraform/cloudwatch-logs.tf new file mode 100644 index 0000000..5812c1a --- /dev/null +++ b/terraform/cloudwatch-logs.tf @@ -0,0 +1,49 @@ +data "aws_caller_identity" "current" {} + +data "aws_region" "current" {} + +resource "aws_cloudwatch_log_group" "ipixel_results" { + name = "/aws/ipixel/${var.site}" + + retention_in_days = 30 + + tags = { + Site = var.site, + Role = "ipixel" + } +} + +data "aws_iam_policy_document" "logs_cloudwatch_log_group" { + statement { + actions = ["logs:DescribeLogStreams"] + resources = ["arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*"] + } + + statement { + actions = ["logs:CreateLogStream", "logs:PutLogEvents"] + resources = ["${aws_cloudwatch_log_group.ipixel_results.arn}:*"] + } +} + +resource "aws_cloudwatch_log_group" "ipixel_parser_logs" { + name = "/aws/ipixel_parser/${var.site}" + + retention_in_days = 3 + + tags = { + Site = var.site, + Role = "ipixel" + } +} + +data "aws_iam_policy_document" "ipixel_parser_cloudwatch_log_group" { + statement { + actions = ["logs:DescribeLogStreams"] + resources = ["arn:aws:logs:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:*"] + } + + statement { + actions = ["logs:CreateLogStream", "logs:PutLogEvents"] + resources = ["${aws_cloudwatch_log_group.ipixel_parser_logs.arn}:*"] + } +} diff --git a/terraform/lambda.tf b/terraform/lambda.tf index 01a38f3..94a751b 100644 --- a/terraform/lambda.tf +++ b/terraform/lambda.tf @@ -33,67 +33,70 @@ EOF # ----------------------------------------------------------------------------------------------------------- # IAM Role for Log Parsing Lambda -resource "aws_iam_role" "lambda" { - name = "${var.site}-lambda-role" - assume_role_policy = <=0.6.0", + "xmlbuilder": "~9.0.1" + } + }, + "xmlbuilder": { + "version": "9.0.7", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-9.0.7.tgz", + "integrity": "sha1-Ey7mPS7FVlxVfiD0wi35rKaGsQ0=" + }, + "yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "dev": true + } + } +} diff --git a/terraform/lambda/package.json b/terraform/lambda/package.json new file mode 100644 index 0000000..6de2e7a --- /dev/null +++ b/terraform/lambda/package.json @@ -0,0 +1,15 @@ +{ + "name": "cloudfront-logs", + "version": "0.0.1", + "dependencies": { + "aws-sdk": "*" + }, + "devDependencies": { + "eslint": "*", + "eslint-config-airbnb-base": "*", + "eslint-plugin-import": "*" + }, + "scripts": { + "lint": "eslint ." + } +} diff --git a/terraform/lambda/src/cloudfront.js b/terraform/lambda/src/cloudfront.js new file mode 100644 index 0000000..816223b --- /dev/null +++ b/terraform/lambda/src/cloudfront.js @@ -0,0 +1,83 @@ +const { gunzip } = require('zlib'); +const { promisify } = require('util'); +const { S3 } = require('aws-sdk'); +const { unescape } = require('querystring'); + +const gunzipAsync = promisify(gunzip); + + +// Parsing the line containing the version. +// +// Format: +// +// #Version: 1.0 +// +const parseVersion = (line) => { + if (!line.startsWith('#Version:')) { + throw new Error(`Invalid version line '${line}'`); + } else { + return line.match(/[\d.]+$/); + } +}; + +// Parsing the line containinge the fields format and use kebab case. +// https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#LogFileFormat +// +// Format: +// eslint-disable-next-line max-len +// #Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields +// +const parseFields = (line) => { + if (!line.startsWith('#Fields:')) { + throw new Error(`Invalid fields line '${line}'`); + } else { + return line.match(/[\w()-]+(\s|$)/g).map(field => ( + // Strip parentheses and remove unecessary abbreviations in field names + field.replace(/\(([^)]+)\)/, '-$1').replace(/^(c-|cs-|sc-)/, '').trim().toLowerCase() + )); + } +}; + +// Unescape value twice (because fuck you that's why). +// https://forums.aws.amazon.com/thread.jspa?threadID=134017 +// +const decode = value => unescape(unescape(value)); + +// Split up line and assign to corresponding field. +// +const parseLine = (line, fields) => { + if (line.startsWith('#')) { + throw new Error(`Invalid log line '${line}'`); + } else { + return line.split('\t').reduce((object, section, index) => { + const result = object; + if (section !== '-') result[fields[index]] = decode(section); // Skip missing fields + return result; + }, {}); + } +}; + +// Get log file from S3 and unzip it. +// +const getLogFile = async ({ bucket, key, region }) => { + const s3 = new S3({ region }); + + const zippedObject = await s3.getObject({ Bucket: bucket, Key: key }).promise(); + const logFile = await gunzipAsync(zippedObject.Body); + return logFile.toString().trim(); +}; + +// Parse log file and return a list of log events. +// +exports.parseLogFile = async ({ bucket, key, region }) => { + const file = await getLogFile({ bucket, key, region }); + + const lines = file.split('\n'); + + // Shift first line which contains the version and parse it for validation + parseVersion(lines.shift()); + // Shift next line containing fields format and parse it for validation + const fields = parseFields(lines.shift()); + + return lines.map(line => parseLine(line, fields)); +}; diff --git a/terraform/lambda/src/cloudwatch-logs.js b/terraform/lambda/src/cloudwatch-logs.js new file mode 100644 index 0000000..c8a8945 --- /dev/null +++ b/terraform/lambda/src/cloudwatch-logs.js @@ -0,0 +1,88 @@ +const { CloudWatchLogs } = require('aws-sdk'); + +// Split up ARN like "arn:aws:logs:eu-west-1:123456789012:log-group:example-group:*" +const [,,, region,,, logGroupName] = process.env.CLOUDWATCH_LOGS_GROUP_ARN.split(':'); + +const cloudwatchlogs = new CloudWatchLogs({ region }); + + +// Group array of hashes by defined key. +// +const groupBy = (array, key) => ( + array.reduce((object, item) => { + const result = object; + + if (result[item[key]]) { + result[item[key]].push(item); + } else if (item[key]) { + result[item[key]] = [item]; + } + return result; + }, {}) +); + +// Find log stream by prefix. +// +const findLogStream = async (logStreamNamePrefix) => { + const params = { logGroupName, logStreamNamePrefix }; + + const { logStreams } = await cloudwatchlogs.describeLogStreams(params).promise(); + + if (logStreams.length > 1) { + throw new Error(`Found '${logStreams.length}' matching CloudWatch Logs streams but expected only one.`); + } + + return logStreams[0]; +}; + +// Get log stream or creting it if not present yet. +// +// Name format: +// 2000-01-01 +// +const describeLogStream = async (logStreamName) => { + let logStream = await findLogStream(logStreamName); + + if (!logStream) { + await cloudwatchlogs.createLogStream({ logGroupName, logStreamName }).promise(); + logStream = await findLogStream(logStreamName); + } + + return logStream; +}; + +// Extend the original record with some additional fields +// and encapsule records into CloudWatch Logs event. +// +const buildlogEvents = records => ( + records.map((record) => { + const payload = record; + payload.name = 'logs:cloudfront'; + + return { + message: JSON.stringify(payload), + timestamp: new Date(`${payload.date} ${payload.time} UTC`).getTime(), + }; + }).sort((a, b) => a.timestamp - b.timestamp) // Events in a request must be chronological ordered +); + +// Send the given documents to CloudWatch Logs group. +// +exports.putLogEvents = async (records) => { + const groupedRecords = groupBy(records, 'date'); + + const putLogEventsCalls = Object.keys(groupedRecords).map(async (key) => { + const logStream = await describeLogStream(key); + + const params = { + logEvents: buildlogEvents(groupedRecords[key]), + logGroupName, + logStreamName: logStream.logStreamName, + sequenceToken: logStream.uploadSequenceToken, + }; + + return cloudwatchlogs.putLogEvents(params).promise(); + }); + + return Promise.all(putLogEventsCalls); +}; diff --git a/terraform/lambda/src/index.js b/terraform/lambda/src/index.js new file mode 100644 index 0000000..871303f --- /dev/null +++ b/terraform/lambda/src/index.js @@ -0,0 +1,18 @@ +const { parseLogFile } = require('./cloudfront'); +const { putLogEvents } = require('./cloudwatch-logs'); + +// Lambda handler. +// +exports.handler = async (event) => { + if (event.Records.length !== 1) { + throw new Error(`Wrong length of events.Records, expected: '1', got: '${event.Records.length}'`); + } else { + const params = { + bucket: event.Records[0].s3.bucket.name, + key: decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' ')), + region: event.Records[0].awsRegion, + }; + + return putLogEvents(await parseLogFile(params)); + } +}; diff --git a/terraform/logging.tf b/terraform/logging.tf index 17c70f0..7279aa0 100644 --- a/terraform/logging.tf +++ b/terraform/logging.tf @@ -3,79 +3,59 @@ # ----------------------------------------------------------------------------------------------------------- # Grant the log parsing lambda access to the logs bucket -# resource "aws_lambda_permission" "allow_bucket" { -# statement_id = "AllowExecutionFromS3Bucket" -# action = "lambda:InvokeFunction" -# function_name = aws_lambda_function.logs_parser.arn -# principal = "s3.amazonaws.com" -# source_arn = aws_s3_bucket.logs.arn -# } +resource "aws_lambda_permission" "allow_bucket" { + statement_id = "AllowExecutionFromS3Bucket" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.ipixel_parser.arn + principal = "s3.amazonaws.com" + source_arn = aws_s3_bucket.ipixel_logs.arn +} # ----------------------------------------------------------------------------------------------------------- # Log Parsing Lambda -# data "archive_file" "logs_parser" { -# type = "zip" -# source_dir = "${path.module}/files/decorate" -# output_path = "${path.module}/files/decorate.zip" -# } -# resource "aws_lambda_function" "logs_parser" { -# filename = data.archive_file.logs_parser.output_path -# function_name = "${var.site}-logs-decorator" -# handler = "index.handler" -# source_code_hash = data.archive_file.logs_parser.output_base64sha256 -# runtime = "nodejs12.x" -# memory_size = "128" -# timeout = "5" -# role = aws_iam_role.lambda.arn +resource "aws_s3_bucket_notification" "ipixel_logs" { + bucket = aws_s3_bucket.ipixel_logs.bucket -# tags = { -# Name = "${var.site}-log-dist" -# Site = var.site -# } -# } + lambda_function { + lambda_function_arn = aws_lambda_function.ipixel_parser.arn + events = ["s3:ObjectCreated:*"] + } -# resource "aws_s3_bucket_notification" "bucket_notification" { -# bucket = aws_s3_bucket.logs.id + depends_on = [aws_lambda_permission.s3_bucket_invoke_function] +} -# lambda_function { -# lambda_function_arn = aws_lambda_function.logs_parser.arn -# events = ["s3:ObjectCreated:*"] -# filter_prefix = "RAW/" -# filter_suffix = ".gz" -# } -# } +data "archive_file" "ipixel_parser" { + type = "zip" + source_dir = "${path.module}/lambda/src" + output_path = ".terraform/tmp/lambda/ipixel_parser.zip" +} -# Reduce log retention to two weeks -# resource "aws_cloudwatch_log_group" "logs_parser" { -# name = "/aws/lambda/${aws_lambda_function.logs_parser.function_name}" -# retention_in_days = 14 -# } +resource "aws_lambda_function" "ipixel_parser" { + function_name = "ipixel-parser-${var.site}" + runtime = "nodejs12.x" + handler = "index.handler" + timeout = 5 + reserved_concurrent_executions = 3 -# ----------------------------------------------------------------------------------------------------------- -# Athena Configuration + environment { + variables = { + CLOUDWATCH_LOGS_GROUP_ARN = aws_cloudwatch_log_group.ipixel_results.arn + } + } -# resource "aws_s3_bucket" "athena" { -# bucket = "${var.site}-athena" -# acl = "private" -# tags = { -# Name = "${var.site}-athena" -# Site = var.site -# } -# } + role = aws_iam_role.ipixel_parser.arn -# resource "aws_athena_workgroup" "wg" { -# name = "${var.site}-wg" -# tags = { -# Name = "${var.site}-wg" -# Site = var.site -# } -# } + filename = data.archive_file.ipixel_parser.output_path + source_code_hash = data.archive_file.ipixel_parser.output_base64sha256 -# resource "aws_athena_database" "db" { -# name = var.site -# bucket = aws_s3_bucket.athena.id -# } + tags = { + Site = var.site, + Role = "ipixel" + } + + depends_on = [aws_cloudwatch_log_group.ipixel_parser_logs] +} diff --git a/terraform/tracking.tf b/terraform/tracking.tf index b452ea9..f501e22 100644 --- a/terraform/tracking.tf +++ b/terraform/tracking.tf @@ -30,7 +30,7 @@ resource "aws_s3_bucket_object" "ipixel" { content_type = "image/gif" } -resource "aws_s3_bucket" "logs" { +resource "aws_s3_bucket" "ipixel_logs" { bucket = "${var.site}-analytics" tags = { @@ -54,7 +54,7 @@ resource "aws_cloudfront_distribution" "tracking" { logging_config { include_cookies = true - bucket = aws_s3_bucket.logs.bucket_regional_domain_name + bucket = aws_s3_bucket.ipixel_logs.bucket_regional_domain_name prefix = "RAW" } @@ -115,3 +115,4 @@ resource "aws_route53_record" "tracking" { evaluate_target_health = false } } + diff --git a/terraform/versions.tf b/terraform/versions.tf new file mode 100644 index 0000000..9b2c6b7 --- /dev/null +++ b/terraform/versions.tf @@ -0,0 +1,11 @@ +terraform { + required_providers { + archive = { + source = "hashicorp/archive" + } + aws = { + source = "hashicorp/aws" + } + } + required_version = ">= 0.13" +} diff --git a/terraform/www.tf b/terraform/www.tf index 3d90e6a..ba2bdb6 100644 --- a/terraform/www.tf +++ b/terraform/www.tf @@ -130,7 +130,7 @@ resource "aws_route53_record" "www" { data "archive_file" "index_redirect" { type = "zip" - output_path = "${path.module}/files/index_redirect.js.zip" + output_path = ".terraform/tmp/lambda/index_redirect.zip" source_file = "${path.module}/files/index_redirect.js" }