aboutsummaryrefslogtreecommitdiffstats
path: root/geoip/ip.lisp
blob: dfdabc0fd665f38fe01cbb3c3e5e3b56b8187b49 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
;;;; -*- mode: lisp -*-

(ql:quickload '(cffi cffi-libffi split-sequence mmap ironclad babel nibbles fiveam local-time))

(defpackage :geoip
  (:use :common-lisp :cffi :split-sequence))
(in-package :geoip)

(defun octet-to-int (seq &key (big-endian t))
  (reduce (lambda (acc n)
            (+ (ash acc 8) n))
          (if big-endian
              seq (reverse seq))
          :initial-value 0))

(5am:test t-ints
  (5am:is (= (octet-to-int '(1 5) :big-endian t) #x105))
  (5am:is (= (octet-to-int '(1 5) :big-endian nil) #x501)))

(defun parse-ipv4 (ip-address)
  (reduce (lambda (acc part)
            (let ((n (parse-integer part)))
              (assert (<= 0 n 255))
              (+ (ash acc 8) n)))
          (split-sequence #\. ip-address)
          :initial-value 0))

(defun parse-ipv6 (ip-address)
  (reduce (lambda (acc part)
            (let ((n (if (string= part "") 0
                         (parse-integer part :radix 16))))
              (assert (<= 0 n #xFFFF))
              (+ (ash acc 16) n)))
          (split-sequence #\: ip-address)
          :initial-value 0))


(defun parse-ip (ip-address)
  (if (find #\. ip-address)
      (cons 4 (parse-ipv4 ip-address))
      (cons 6 (parse-ipv6 ip-address))))

(5am:test ipv4
  (5am:is (= (parse-ipv4 "8.8.8.8") 134744072))
  (5am:is (= (parse-ipv6 "::1") 1)))

(defparameter *metadata-marker*
  (concatenate 'vector
               #(#xab #xcd #xef)
               (map 'vector #'char-code "MaxMind.com")))


(defstruct maxmind-database
  (filename nil :read-only t)
  (ptr nil :read-only t)
  (fd nil :read-only t)
  (size 0 :read-only t)
  (node-count 0 :read-only t :type fixnum)
  (record-size 0 :read-only t :type fixnum)
  (ip-version 0 :read-only t :type fixnum)
  (data-offset -1 :type fixnum)
  (metadata nil))

(defstruct db-reader
  (db-ptr)
  (head-ptr)
  (data-offset))

(defstruct maxmind-database-metadata
  (node-count 0 :read-only t :type fixnum)
  (record-size 0 :read-only t :type fixnum)
  (ip-version 0 :read-only t :type fixnum)
  (database-type nil :read-only t :type string)
  (languages nil :read-only t :type list)
  (binary-format-major-version 0 :read-only t :type fixnum)
  (binary-format-minor-version 0 :read-only t :type fixnum)
  (build-epoch 0 :read-only t :type fixnum)
  (description nil :read-only t :type list))


(defun get-val (alist key &key (test #'equal))
  (cdr (assoc key alist :test test)))

(defun make-metadata (data)
  (make-maxmind-database-metadata
   :binary-format-major-version (get-val data :binary-format-major-version)
   :binary-format-minor-version (get-val data :binary-format-minor-version)
   :build-epoch (get-val data :build-epoch)
   :database-type (get-val data :database-type)
   :description (get-val data :description)
   :ip-version (get-val data :ip-version)
   :languages (get-val data :languages)
   :node-count (get-val data :node-count)
   :record-size (get-val data :record-size)))

(defun metadata-marker-p (ptr offset)
  (loop for i below #.(length *metadata-marker*)
        always (eql (mem-ref ptr :uchar (+ i offset))
                    (svref *metadata-marker* i))))

(defun find-metadata-start (ptr size)
  (loop for offset from (- size 1 #.(length *metadata-marker*))
          downto (- size 1 (* 128 1024))
        when (metadata-marker-p ptr offset)
          return (+ offset #.(length *metadata-marker*))))

(defun read-db-char (reader)
  (with-slots (db-ptr) reader
    (prog1 (mem-ref db-ptr :uchar)
      (incf-pointer db-ptr))))

(defun bytes-from-foreign (reader data-length)
  (let ((bytes (make-array data-length :element-type '(unsigned-byte 8))))
    (with-slots (db-ptr) reader
      (foreign-array-to-lisp db-ptr
                             `(:array :uchar ,data-length)
                             :element-type '(unsigned-byte 8)
                             :displaced-to bytes)
      (incf-pointer db-ptr data-length))
    bytes))

(defun mread-uft8 (reader length)
  (with-slots (db-ptr) reader
    (prog1 (foreign-string-to-lisp db-ptr :count length :encoding :utf-8)
      (incf-pointer db-ptr length))))

(defmacro mem-uint (ptr size)
  `(let ((r 0))
     (dotimes (i ,size r)
       (setf r (+ (ash r 8) (mem-ref ,ptr :uint8 i))))))

(defun mread-unsigned (reader size)
  (with-slots (db-ptr) reader
    (prog1 (mem-uint db-ptr size)
      (incf-pointer db-ptr size))))

(defun mread-int32 (db-ptr length)
  (let ((uval (mread-unsigned db-ptr length)))
    (if (and (= length 4) (logbitp 31 uval))
        (- uval #.(expt 2 32))
        uval)))

(5am:test reader
  (with-foreign-array (a #(0 0 1 23 126 195 159 195 156) '(:array :uint8 9))
    (let ((r (make-db-reader :db-ptr a)))
      (5am:is (= 279 (mread-unsigned r 4)))
      (5am:is (equal "~ßÜ" (mread-uft8 r 5))))))

(5am:run-all-tests)


(defun mread-datafield-metadata (db-ptr)
  (let* ((control-byte (read-db-char db-ptr))
         (type (ldb (byte 3 5) control-byte))
         (length (ldb (byte 5 0) control-byte)))
    (when (zerop type)
      (setf type (+ 7 (read-db-char db-ptr))))
    (when (= type 1)
      (incf-pointer (db-reader-db-ptr db-ptr) -1))
    (list type
          (cond
            ((or (= type 1) (< length 29))
             length)
            ((= length 29)
             (+ 29 (read-db-char db-ptr)))
            ((= length 30)
             (+ 285 (mread-unsigned db-ptr 2)))
            ((= length 31)
             (+ 65821 (mread-unsigned db-ptr 3)))))))


(defun mread-map (db-ptr length)
  (loop repeat length
        collect
        (cons
         (intern (string-upcase (substitute #\- #\_ (mread-data db-ptr))) :keyword)
         (mread-data db-ptr))))

(defun mread-list (db-ptr length)
  (loop repeat length collect (mread-data db-ptr)))

(defun read-pointer (reader)
  (let* ((control-byte (read-db-char reader))
         (size-bits (ldb (byte 2 3) control-byte))
         (value-bits (ldb (byte 3 0) control-byte)))
    (ecase size-bits
      (0 (+ (ash value-bits 8) (read-db-char reader)))
      (1 (+ (ash value-bits 16) (mread-unsigned reader 2) 2048))
      (2 (+ (ash value-bits 24) (mread-unsigned reader 3) 526336))
      (3 (mread-unsigned reader 4)))))

(defun mread-pointer (reader)
  (with-slots (db-ptr head-ptr data-offset) reader
    (let* ((target (+ (read-pointer reader) data-offset))
           (curr-ptr db-ptr)
           (result))
      (setf db-ptr (inc-pointer head-ptr target))
      (setf result (mread-data reader))
      (setf db-ptr curr-ptr)
      result)))

(defun mread-data (db-ptr)
  (destructuring-bind (type length) (mread-datafield-metadata db-ptr)
    (ecase type
      (1 (mread-pointer db-ptr))
      (2 (mread-uft8 db-ptr length))
      (3 (cons 'double (mread-unsigned db-ptr 8)))
      (4 (bytes-from-foreign db-ptr length))
      ((5 6 9 10) (mread-unsigned db-ptr length))
      (7 (mread-map db-ptr length))
      (8 (mread-int32 db-ptr length))
      (11 (mread-list db-ptr length))
      (14 (< 0 length)) ; bool
      )))

(defun read-node-record (mmdb node-number bit)
  "Read left (bit=0) or right (bit=1) record from node"
  (with-slots (ptr record-size) mmdb
    (let* ((node-bytes (/ record-size 4))
           (record-bytes (floor record-size 8))
           (node-offset (* node-bytes node-number)))
      (if (evenp node-bytes)
          ;; Simple case: records align to byte boundaries
          (mem-uint (inc-pointer ptr (+ node-offset (* bit record-bytes))) record-bytes)
          ;; Complex case: records share a byte
          (let* ((base-value (mem-uint (inc-pointer ptr (+ node-offset (* bit (1+ record-bytes)))) record-bytes))
                 (shared-byte (mem-ref (inc-pointer ptr (+ node-offset record-bytes)) :uchar))
                 (high-bits (if (zerop bit) (ash shared-byte -4) (logand shared-byte #xf))))
            (+ (ash high-bits (* record-bytes 8)) base-value))))))

(defun record-value (mmdb ip-bits)
  (with-slots (node-count) mmdb
    (loop with node = 0
          for bit in ip-bits
          for next-node = (read-node-record mmdb node bit)
          do (setf node next-node)
          when (= node node-count)
            return nil
          when (> node node-count)
            return node)))

(defun lookup-ip (mmdb ip-bits)
  (alexandria:when-let ((record (record-value mmdb ip-bits)))
    (with-slots (ptr data-offset node-count) mmdb
      (mread-data
       (make-db-reader
        :db-ptr (inc-pointer ptr (+ data-offset (- record node-count 16)))
        :head-ptr ptr
        :data-offset data-offset)))))

(defun integer-to-bits (n bit-count)
  "Convert integer to list of bits (MSB first)"
  (loop for i from (1- bit-count) downto 0
        collect (if (logbitp i n) 1 0)))

(defun mmap->mmdb (file ptr fd size)
  (let* ((metadata-start (find-metadata-start ptr size))
         (db-reader (make-db-reader :db-ptr (inc-pointer ptr metadata-start)))
         (metadata (make-metadata (mread-data db-reader))))
    (with-slots (record-size node-count ip-version) metadata
      (make-maxmind-database
       :filename file
       :ptr ptr :fd fd :size size
       :metadata metadata
       :node-count node-count
       :record-size record-size
       :ip-version ip-version
       ;; :data-offset (+ 16 (* (/ (* 2 record-size) 8) node-count))
       :data-offset (+ 16 (/ (* record-size node-count) 4))))))

(defun make-mmdb (file)
  (multiple-value-bind (ptr fd size) (mmap:mmap file)
    (mmap->mmdb file ptr fd size)))

(defun close-mmdb (mmdb)
  (with-slots (ptr fd size) mmdb
    (mmap:munmap ptr fd size)))
(defvar *mmdb* (make-mmdb  "GeoLite2-Country.mmdb"))

(with-slots (metadata) *mmdb*
  (with-slots (build-epoch) metadata
    (local-time:unix-to-timestamp
     build-epoch)))