blob: 92c7ede470136fcd092992aa7809be368158b6c8 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
(ql:quickload 'cl-ppcre)
(ql:quickload 'sqlite)
(ql:quickload 'cl-yesql)
(ql:quickload 'cl-yesql/sqlite)
(ql:quickload "gzip-stream")
(defparameter *log-line* "(.+?) (.+?) (.+?) \\[(.+?)\\] \"(.+?) (.+?) (HTTP/.*)?\" (.+?) (.+?) \"(.+?)\" \"(.+?)\"")
(defparameter +short-month-names+
(alexandria:plist-hash-table
'("Jan" 1 "Feb" 2 "Mar" 3 "Apr" 4 "May" 5 "Jun" 6 "Jul" 7 "Aug" 8 "Sep" 9 "Oct" 10 "Nov" 11 "Dec" 12)
:test #'equal))
(defvar *log-files*
(sort
(remove-if-not
(lambda (str) (ppcre:scan "^stats-access" str))
(uiop:directory-files #P"/scratch/titan/")
:key #'pathname-name)
#'<
:key (lambda (filename)
(sb-posix:stat-mtime
(sb-posix:stat filename)))))
(defun time-to-posix (timestr)
(ppcre:register-groups-bind ((#'parse-integer date) month (#'parse-integer year hour min sec tz))
("(\\d{2})/(\\w{3})/(\\d{4}):(\\d{2}):(\\d{2}):(\\d{2}) ([+-]\\d{4})" timestr)
(-
(encode-universal-time sec min hour date (gethash month +short-month-names+) year (/ tz 100))
#.(encode-universal-time 0 0 0 1 1 1970 0))))
(= (time-to-posix "02/Mar/2025:00:00:16 +0000") 1740870016)
(ppcre:register-groups-bind
(ip remote-log userid (#'time-to-posix date) method path version (#'parse-integer status length) referrer agent)
(*log-line*
"20.171.207.185 - - [02/Mar/2025:00:00:16 +0000] \"GET /pub/hi/scratch/plain/AoC2023/day02/input?h=simpler&id=e2f5b6c2d5bb67013ba1b612252781e4cd9b6fe1 HTTP/1.1\" 200 7017 \"-\" \"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.2; +https://openai.com/gptbot)\"")
(list ip remote-log userid date method path version status length referrer agent))
(defvar *sqlite*
(sqlite:connect "/scratch/titan/web-logs.sqlite"))
(sqlite:disconnect *sqlite*)
(with-open-file (in #P"/scratch/titan/cgit_access.log")
(read-line in)
(ppcre:register-groups-bind
(ip remote-log userid date method path version (#'parse-integer status length) referrer agent)
(*log-line* (read-line in))
(let ((ipid
(or (sqlite:execute-single *sqlite* "select id from ip where addr = ?" ip)
(sqlite:execute-single *sqlite* "insert into ip (addr) values (?) returning id" ip)))
(agentid
(or (sqlite:execute-single *sqlite* "select id from agent where user_agent = ?" agent)
(sqlite:execute-single *sqlite* "insert into agent (user_agent) values (?) returning id" agent))))
;; (sqlite:execute-single *sqlite* "select id from agent where user_agent = ?" agent)
(list
(list ip ipid remote-log userid date (time-to-posix date) method path version status length referrer agent agentid)))))
;; PRAGMA synchronous = 0;
;; PRAGMA journal_mode=WAL;
;; PRAGMA cache_size = 1000000;
;; PRAGMA locking_mode = EXCLUSIVE;
;; PRAGMA temp_store = MEMORY;
(yesql:import log-queries
:from "/home/titan/dev/scratch/webstats/logs.sql"
:as :cl-yesql/sqlite
:binding :all-functions)
(create-log-table *sqlite*)
(create-agent-table *sqlite*)
(create-ip-table *sqlite*)
(defun insert-line (db line)
(ppcre:register-groups-bind
(ip remote-log userid (#'time-to-posix date) method path version (#'parse-integer status length) referrer agent)
(*log-line* line)
(insert-log db ip remote-log userid date method path version status length referrer agent)))
(defun insert-from (stream)
(loop for line = (read-line stream nil nil)
while line
do (insert-line *sqlite* line)))
(time
(sqlite:with-transaction *sqlite*
(with-open-file (in #P"/scratch/titan/stats-access.log.1")
(insert-from in))))
(loop for f in *log-files*
when (ppcre:scan "gz$" (pathname-type f))
do
(print f)
(time
(sqlite:with-transaction *sqlite*
(gzip-stream:with-open-gzip-file (in f)
(insert-from in)))))
(sqlite:execute-non-query *sqlite* "insert into agent (user_agent) select distinct user_agent from logs")
(sqlite:execute-non-query *sqlite* "alter table logs add column agentid integer")
(sqlite:execute-non-query *sqlite* "update logs set agentid = (select id from agent where logs.user_agent = agent.user_agent)")
(sqlite:execute-non-query *sqlite* "alter table logs drop column user_agent")
(sqlite:execute-non-query *sqlite* "insert into ip (addr) select distinct ip from logs")
(sqlite:execute-non-query *sqlite* "alter table ip add column country text")
(sqlite:execute-non-query *sqlite* "alter table ip add column city text")
(sqlite:execute-non-query *sqlite* "alter table logs add column ipid integer")
(sqlite:execute-non-query *sqlite* "update logs set ipid = (select id from ip where logs.ip = ip.addr)")
(sqlite:execute-non-query *sqlite* "alter table logs drop column ip")
(time
(sqlite:execute-to-list *sqlite* "select * from ip where addr = '18.227.13.136'"))
(sqlite:execute-single *sqlite* "select count(*) from ip")
(ql:quickload 'arrows)
(sqlite:with-transaction *sqlite*
(let ((countries
(geoip:with-mmdb (db "GeoLite2-City.mmdb")
(arrows:->>
(sqlite:execute-to-list *sqlite* "select id, addr from ip")
(mapcar (lambda (row)
(destructuring-bind (id ip) row
(let ((res (geoip:query-ip db ip)))
(list
id
(geoip:get-in res :city :names :en)
(geoip:get-in res :country :iso-code))))))))))
(loop for (id city country) in countries
do (update-ip-location *sqlite* city country id))))
|