aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOscar Najera <hi@oscarnajera.com>2025-05-26 00:19:07 +0200
committerOscar Najera <hi@oscarnajera.com>2025-05-26 00:19:07 +0200
commit7f75595a1ec0c76c06b7804853d0d1f42c4c35c6 (patch)
treee3974e14c0124ff86c50954b56cf79cf09894cf2
parent31ec4b3614a693d9aefe2d1308debeda77932221 (diff)
downloadscratch-7f75595a1ec0c76c06b7804853d0d1f42c4c35c6.tar.gz
scratch-7f75595a1ec0c76c06b7804853d0d1f42c4c35c6.tar.bz2
scratch-7f75595a1ec0c76c06b7804853d0d1f42c4c35c6.zip
just enough to read maxminddb metadata
-rw-r--r--geoip/ip.lisp175
-rw-r--r--geoip/readme.org4
2 files changed, 179 insertions, 0 deletions
diff --git a/geoip/ip.lisp b/geoip/ip.lisp
new file mode 100644
index 0000000..ef18fce
--- /dev/null
+++ b/geoip/ip.lisp
@@ -0,0 +1,175 @@
+;;;; -*- mode: lisp -*-
+
+(ql:quickload '(cffi cffi-libffi split-sequence mmap ironclad babel nibbles fiveam))
+
+(defpackage :geoip
+ (:use :common-lisp :cffi :split-sequence))
+(in-package :geoip)
+
+(defun octet-to-int (seq &key (big-endian t))
+ (reduce (lambda (acc n)
+ (+ (ash acc 8) n))
+ (if big-endian
+ seq (reverse seq))
+ :initial-value 0))
+
+(5am:test t-ints
+ (5am:is (= (octet-to-int '(1 5) :big-endian t) #x105))
+ (5am:is (= (octet-to-int '(1 5) :big-endian nil) #x501)))
+
+(defun parse-ipv4 (ip-address)
+ (reduce (lambda (acc part)
+ (let ((n (parse-integer part)))
+ (assert (<= 0 n 255))
+ (+ (ash acc 8) n)))
+ (split-sequence #\. ip-address)
+ :initial-value 0))
+
+(defun parse-ipv6 (ip-address)
+ (reduce (lambda (acc part)
+ (let ((n (if (string= part "") 0
+ (parse-integer part :radix 16))))
+ (assert (<= 0 n #xFFFF))
+ (+ (ash acc 16) n)))
+ (split-sequence #\: ip-address)
+ :initial-value 0))
+
+
+(defun parse-ip (ip-address)
+ (if (find #\. ip-address)
+ (cons 4 (parse-ipv4 ip-address))
+ (cons 6 (parse-ipv6 ip-address))))
+
+(5am:test ipv4
+ (5am:is (= (parse-ipv4 "8.8.8.8") 134744072))
+ (5am:is (= (parse-ipv6 "::1") 1)))
+
+(defparameter *metadata-marker*
+ (concatenate 'vector
+ #(#xab #xcd #xef)
+ (map 'vector #'char-code "MaxMind.com")))
+
+(defstruct maxmind-database-metadata
+ (node-count 0 :read-only t :type fixnum)
+ (record-size 0 :read-only t :type fixnum)
+ (ip-version 0 :read-only t :type fixnum)
+ (database-type nil :read-only t :type string)
+ (languages nil :read-only t :type vector)
+ (binary-format-major-version 0 :read-only t :type fixnum)
+ (binary-format-minor-version 0 :read-only t :type fixnum)
+ (build-epoch 0 :read-only t :type fixnum)
+ (description nil :read-only t :type list))
+
+(defstruct maxmind-database
+ (ptr nil :read-only t)
+ (filename nil :read-only t)
+ (size 0 :read-only t)
+ (metadata nil)
+ (data-offset -1 :type fixnum))
+
+(defstruct db-reader
+ (db-ptr))
+
+(defun metadata-marker-p (ptr offset)
+ (loop for i below #.(length *metadata-marker*)
+ always (eql (mem-ref ptr :uchar (+ i offset))
+ (svref *metadata-marker* i))))
+
+(defun find-metadata-start (ptr size)
+ (loop for offset from (- size 1 14) downto (- size 1 (* 128 1024))
+ when (metadata-marker-p ptr offset)
+ return (+ offset (length *metadata-marker*))))
+
+(defun mmap->mmdb (ptr file size)
+ (bind ((mmdb (make-maxmind-database :ptr ptr :filename file :size size))
+ (metadata-start (find-metadata-start ptr size)))
+ (setf (maxmind-database-data-offset mmdb) metadata-start)
+ (bind ((metadata (read-metadata mmdb metadata-start))
+ ((:structure maxmind-database-metadata- record-size node-count) metadata)
+ (data-offset (+ 16 (* (/ (* 2 record-size) 8) node-count))))
+ (setf (maxmind-database-metadata mmdb) metadata)
+ (setf (maxmind-database-data-offset mmdb) data-offset))
+ mmdb))
+
+(defun make-mmdb (file)
+ (multiple-value-bind (ptr fd size) (mmap file)
+ (declare (ignore fd))
+ (mmap->mmdb ptr file size)))
+
+(defvar *mmdb*
+ (multiple-value-bind (ptr fd size) (mmap:mmap "GeoLite2-Country.mmdb")
+ (cons ptr size)))
+
+(find-metadata-start (car *mmdb*) (cdr *mmdb*))
+;; metaof 7786987
+
+(defun read-db-char (reader)
+ (with-slots (db-ptr) reader
+ (prog1 (mem-ref db-ptr :uchar)
+ (incf-pointer db-ptr))))
+
+(defun bytes-from-foreign (db-ptr data-length)
+ (let ((bytes (make-array data-length :element-type '(unsigned-byte 8))))
+ (loop for i below data-length
+ do (setf (aref bytes i) (read-db-char db-ptr)))
+ bytes))
+
+(defun mread-raw-unsigned (ptr size &key (big-endian t))
+ (octet-to-int
+ (bytes-from-foreign ptr size)
+ :big-endian big-endian))
+
+(mread-raw-unsigned *reader* 2)
+
+(defun mread-datafield-metadata (db-ptr)
+ (let* ((control-byte (read-db-char db-ptr))
+ (type (ldb (byte 3 5) control-byte))
+ (length (ldb (byte 5 0) control-byte)))
+ (when (= type 0)
+ (setf type (+ 7 (read-db-char db-ptr))))
+ (list type
+ (cond
+ ((or (= type 1) (< length 29))
+ length)
+ ((= length 29)
+ (+ 29 (read-uchar db-ptr)))
+ ((= length 30)
+ (+ 285 (mread-raw-unsigned db-ptr 2)))
+ ((= length 31)
+ (+ 65821 (mread-raw-unsigned db-ptr 3)))))
+ (list type length)))
+
+(defun mread-uft8 (db-ptr length)
+ (babel:octets-to-string
+ (bytes-from-foreign db-ptr length)))
+
+
+(defun mread-map (db-ptr length)
+ (loop repeat length
+ collect
+ (cons
+ (mread-data db-ptr)
+ (mread-data db-ptr))))
+
+(defun mread-list (db-ptr length)
+ (loop repeat length collect (mread-data db-ptr)))
+
+(defun mread-data (db-ptr)
+ (destructuring-bind (type length) (mread-datafield-metadata db-ptr)
+ (ecase type
+ (2 (mread-uft8 db-ptr length))
+ ((5 6 9 10) (mread-raw-unsigned db-ptr length))
+ (7 (mread-map db-ptr length))
+ (11 (mread-list db-ptr length)))))
+
+(mem-ref (car *mmdb*) :uchar 7786987)
+(mem-ref (inc-pointer (car *mmdb*) 7786987) :uchar )
+(defvar *reader*
+ (make-db-reader :db-ptr (inc-pointer (car *mmdb*) 7786987)))
+(setf (db-reader-db-ptr *reader*) (inc-pointer (car *mmdb*) 7786987))
+(mread-datafield-metadata *reader*)
+(mread-data *reader*)
+
+(defvar *refdb*
+ (cl-maxminddb:make-mmdb "GeoLite2-Country.mmdb"))
+
diff --git a/geoip/readme.org b/geoip/readme.org
new file mode 100644
index 0000000..cd6c10e
--- /dev/null
+++ b/geoip/readme.org
@@ -0,0 +1,4 @@
+#+title: Using geoip databases from mindmax from common lisp
+
+* get data from
+https://github.com/P3TERX/GeoLite.mmdb