diff --git a/analytics/index.js b/analytics/index.js index ef41ff0..e677272 100644 --- a/analytics/index.js +++ b/analytics/index.js @@ -98,6 +98,8 @@ async function* loadFiles () { ); `); + await db.run('PRAGMA busy_timeout = 6000'); + const stmt = await db.prepare(sql` REPLACE INTO records VALUES ( :dts, @@ -122,6 +124,8 @@ async function* loadFiles () { ); `); + let counter = 0; + await pipeline( Readable.from(loadFiles()), parser, @@ -180,7 +184,7 @@ async function* loadFiles () { client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null, duration, language, - viewed, + scrolled: viewed, max_scroll, page_height, viewport_height, @@ -202,15 +206,24 @@ async function* loadFiles () { write (record, encoding, done) { (async () => { const params = Object.fromEntries( - Object.entries(record).map(([ k, v ]) => [ ':' + k, v ]), + Object.entries(record).map(([ k, v ]) => [ ':' + k, v || null ]), ); - await stmt.run(params); - process.stdout.write('.'); + while (true) { + try { + await stmt.run(params); + break; + } catch (err) { + if (err.code !== 'SQLITE_BUSY') throw err; + } + } + counter++; + if (!(counter % 10)) process.stdout.write('.'); })().then(() => done(), done); }, }), ); + await stmt.finalize(); await db.close(); })().then( diff --git a/analytics/queries.sql b/analytics/queries.sql index 7ed0ecb..c074ea4 100644 --- a/analytics/queries.sql +++ b/analytics/queries.sql @@ -11,3 +11,12 @@ FROM records WHERE duration > 1 AND duration < (60 * 30) GROUP BY duration / 60 HAVING total > 5; + +SELECT referrer_host, count(DISTINCT IFNULL(tid, ip)) as tids, referrer +FROM records +GROUP BY referrer_host; + +SELECT COUNT(IFNULL(tid,ip)) as total, referrer +FROM records +WHERE referrer_host LIKE '%reddit.com' +GROUP BY referrer