diff options
Diffstat (limited to 'webstats')
-rw-r--r-- | webstats/logparse.lisp | 121 | ||||
-rw-r--r-- | webstats/logs.sql | 27 |
2 files changed, 148 insertions, 0 deletions
diff --git a/webstats/logparse.lisp b/webstats/logparse.lisp new file mode 100644 index 0000000..71864d8 --- /dev/null +++ b/webstats/logparse.lisp @@ -0,0 +1,121 @@ +(ql:quickload 'cl-ppcre) +(ql:quickload 'sqlite) +(ql:quickload 'cl-yesql) +(ql:quickload 'cl-yesql/sqlite) +(ql:quickload "gzip-stream") + +(defparameter *log-line* "(.+?) (.+?) (.+?) \\[(.+?)\\] \"(.+?) (.+?) (HTTP/.*)?\" (.+?) (.+?) \"(.+?)\" \"(.+?)\"") + +(defparameter +short-month-names+ + (alexandria:plist-hash-table + '("Jan" 1 "Feb" 2 "Mar" 3 "Apr" 4 "May" 5 "Jun" 6 "Jul" 7 "Aug" 8 "Sep" 9 "Oct" 10 "Nov" 11 "Dec" 12) + :test #'equal)) + +(defvar *log-files* + (sort + (remove-if-not + (lambda (str) (ppcre:scan "^cgit_access" str)) + (uiop:directory-files #P"/scratch/titan/apache2/") + :key #'pathname-name) + #'< + :key (lambda (filename) + (sb-posix:stat-mtime + (sb-posix:stat filename))))) + +(defun time-to-posix (timestr) + (ppcre:register-groups-bind ((#'parse-integer date) month (#'parse-integer year hour min sec)) + ("(\\d{2})/(\\w{3})/(\\d{4}):(\\d{2}):(\\d{2}):(\\d{2})" timestr) + (- + (encode-universal-time sec min hour date (gethash month +short-month-names+) year) + #.(encode-universal-time 0 0 0 1 1 1970 0)))) + +(= (time-to-posix "02/Mar/2025:00:00:16 +0000") 1740870016) + +(ppcre:register-groups-bind + (ip remote-log userid (#'time-to-posix date) method path version (#'parse-integer status length) referrer agent) + (*log-line* + "20.171.207.185 - - [02/Mar/2025:00:00:16 +0000] \"GET /pub/hi/scratch/plain/AoC2023/day02/input?h=simpler&id=e2f5b6c2d5bb67013ba1b612252781e4cd9b6fe1 HTTP/1.1\" 200 7017 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)\"") + (list ip remote-log userid date method path version status length referrer agent)) + + +(defvar *sqlite* + (sqlite:connect "/scratch/titan/apache2/cgit-logs.sqlite")) +(sqlite:disconnect *sqlite*) + +(with-open-file (in #P"/scratch/titan/cgit_access.log") + (read-line in) + (ppcre:register-groups-bind + (ip remote-log userid date method path version (#'parse-integer status length) referrer agent) + (*log-line* (read-line in)) + + can this be simplified? + (let ((ipid + (or (sqlite:execute-single *sqlite* "select id from ip where addr = ?" ip) + (sqlite:execute-single *sqlite* "insert into ip (addr) values (?) returning id" ip))) + (agentid + (or (sqlite:execute-single *sqlite* "select id from agent where user_agent = ?" agent) + (sqlite:execute-single *sqlite* "insert into agent (user_agent) values (?) returning id" agent)))) + + + ;; (sqlite:execute-single *sqlite* "select id from agent where user_agent = ?" agent) + (list + (list ip ipid remote-log userid date (time-to-posix date) method path version status length referrer agent agentid))))) + +;; PRAGMA synchronous = 0; +;; PRAGMA journal_mode=WAL; +;; PRAGMA cache_size = 1000000; +;; PRAGMA locking_mode = EXCLUSIVE; +;; PRAGMA temp_store = MEMORY; + +(yesql:import log-queries + :from "/home/titan/dev/scratch/webstats/logs.sql" + :as :cl-yesql/sqlite + :binding :all-functions) + +(create-log-table *sqlite*) +(create-agent-table *sqlite*) +(create-ip-table *sqlite*) + +(defun insert-line (db line) + (ppcre:register-groups-bind + (ip remote-log userid (#'time-to-posix date) method path version (#'parse-integer status length) referrer agent) + (*log-line* line) + (insert-log db ip remote-log userid date method path version status length referrer agent))) + +(defun insert-from (stream) + (loop for line = (read-line stream nil nil) + while line + do (insert-line *sqlite* line))) + +(time + (sqlite:with-transaction *sqlite* + (with-open-file (in #P"/scratch/titan/apache2/cgit_access.log") + (insert-from in)))) + + +(loop for f in *log-files* + when (ppcre:scan "gz$" (pathname-type f)) + do + (print f) + (time + (sqlite:with-transaction *sqlite* + (gzip-stream:with-open-gzip-file (in f) + (insert-from in))))) + + +(sqlite:execute-non-query *sqlite* "insert into agent (user_agent) select distinct user_agent from logs") +(sqlite:execute-non-query *sqlite* "alter table logs add column agentid integer") +(sqlite:execute-non-query *sqlite* "update logs set agentid = (select id from agent where logs.user_agent = agent.user_agent)") +(sqlite:execute-non-query *sqlite* "alter table logs drop column user_agent") + +(sqlite:execute-non-query *sqlite* "insert into ip (addr) select distinct ip from logs") +(sqlite:execute-non-query *sqlite* "alter table logs add column ipid integer") +(sqlite:execute-non-query *sqlite* "update logs set ipid = (select id from ip where logs.ip = ip.addr)") +(sqlite:execute-non-query *sqlite* "alter table logs drop column ip") + +(time + (sqlite:execute-to-list *sqlite* "select * from ip where addr = '18.227.13.136'")) + + + +(sqlite:execute-single *sqlite* "select count(*) from ip") diff --git a/webstats/logs.sql b/webstats/logs.sql new file mode 100644 index 0000000..9b5146b --- /dev/null +++ b/webstats/logs.sql @@ -0,0 +1,27 @@ +-- name: create-log-table +CREATE TABLE IF NOT EXISTS logs ( + ip text, + remote_log text, + userid text, + date integer, + request_method text, + path text, + version text, + status integer, + length integer, + referrer text, + user_agent text) + +-- name: create-agent-table +CREATE TABLE IF NOT EXISTS agent ( + id integer PRIMARY KEY, + user_agent text UNIQUE) + +-- name: create-ip-table +CREATE TABLE IF NOT EXISTS ip ( + id integer PRIMARY KEY, + addr text UNIQUE) + +-- name: insert-log +INSERT INTO logs (ip, remote_log, userid, date, request_method, path, version, status, length, referrer, user_agent) + VALUES (?ip, ?remote_log, ?userid, ?date, ?request_method, ?path, ?version, ?status, ?length, ?referrer, ?user_agent) |