aboutsummaryrefslogtreecommitdiffstats
path: root/scratch/semgrep/semantic-search.el
blob: e1692d060e9fb78ab4e6175ca1a0ba055df2d772 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
;;; semantic-search.el --- Search for semantic similarity of text -*- lexical-binding: t; -*-
;;
;; Copyright (C) 2024 Óscar Nájera
;;
;; Author: Óscar Nájera <hi@oscarnajera.com>
;; Maintainer: Óscar Nájera <hi@oscarnajera.com>
;; Created: February 04, 2024
;; Modified: February 04, 2024
;; Version: 0.1.0
;; Keywords: convenience data docs files hypermedia i18n matching tools
;; Homepage: https://github.com/titan/semantic-search
;; Package-Requires: ((emacs "27.1"))
;;
;; This file is not part of GNU Emacs.
;;
;;; Commentary:
;;
;;  Search for semantic similarity of documents at a paragraph level
;;
;;; Code:


(require 'url)
(require 'org-element)
(require 'org-roam-db)
(require 'dash)

;; Silence byte-compiler.
(defvar url-http-end-of-headers)

(defcustom semantic-search-server-url "http://localhost:8080"
  "Address where the Chromadb server is listening."
  :type 'url
  :group 'semantic-search)

(defun semantic-search--connect (method data)
  "Synchronous query to the server."
  (let ((url-request-method "POST")
        (url-request-extra-headers '(("Content-Type" . "application/json")))
        (url-request-data (encode-coding-string
                           (json-serialize `(,method  ,data))
                           'utf-8)))
    (with-current-buffer
        (url-retrieve-synchronously semantic-search-server-url)
      (goto-char url-http-end-of-headers)
      (json-read))))

(defun semantic-search--org-id (paragraph &optional default)
  (-->
   (org-element-map
       (org-element-property :parent paragraph)
       'node-property
     (lambda (np)
       (cons
        (org-element-property :key np)
        (org-element-property :value np))))
   (assoc 'ID it #'string=)
   (cdr it)
   (org-string-nw-p it)
   (or it default)))

(defun semantic-search--prepare-paragraph (file-id)
  (lambda (paragraph)
    (list
     :document (substring-no-properties (org-element-interpret-data paragraph))
     :metadata (list  :start-point
                      (org-element-property :begin paragraph)
                      :node-id
                      (semantic-search--org-id paragraph file-id)))))

(defun semantic-search--add-buffer ()
  (interactive)
  (if (eq major-mode 'org-mode)
      (-some-->
          (org-element-map
              (org-element-parse-buffer)
              'paragraph
            (semantic-search--prepare-paragraph (org-id-get (point-min) 'create)))
        (cl-coerce it 'vector)
        ;; (json-serialize it)
        ;; (f-write it 'utf-8 "/tmp/out.json")
        ;; (message "%S" it)
        (semantic-search--connect :insert it))
    (user-error "This only works on org-mode")))

(defun semantic-search--roam-data (entries)
  (thread-last
    (cl-mapcar (lambda (meta)
                 (alist-get 'node-id meta))
               entries)
    (delete-dups)
    (vconcat)
    (org-roam-db-query [:select [id title file]
                        :from nodes
                        :where (in id $v1)])))

(defun semantic-search--del-buffer (org-ids)
  (interactive (list (org-id-get)))
  (unless (null org-ids)
    (semantic-search--connect :delete org-ids)))

(defun semantic-search-pick-org-element ()
  (when-let ((context (ignore-errors (org-element-context))))
    (filter-buffer-substring (org-element-property :begin context)
                             (org-element-property :end context))))

(defun semantic-search--sync-db ()
  (org-roam-dolist-with-progress (file (nreverse (org-roam-list-files)))
      "importing to semantic search"
    (org-roam-with-file file nil
      (semantic-search--add-buffer))))

;; (semantic-search--sync-db)
(defun semantic-search (text)
  (interactive (list (or (semantic-search-pick-org-element)
                         (read-from-minibuffer "What are you looking for? "))))
  (-let (((&alist 'distances 'documents 'metadatas)
          (semantic-search--connect :query text)))
    (with-current-buffer (get-buffer-create "*Semantic Search*")
      (erase-buffer)
      (org-mode)
      (insert "#+title: Looking for:\n" text "\n")
      (cl-mapc
       (lambda (entry-distances entry-document entry-metadatas)
         (let ((data (semantic-search--roam-data entry-metadatas)))
           (cl-mapc
            (lambda (d paragraph meta)
              (unless (zerop d)
                (-let* ((node-id (alist-get 'node-id meta))
                        ((_ title file) (assoc node-id data #'string=))
                        (pos
                         (if file
                             (with-temp-buffer
                               (insert-file-contents file)
                               (line-number-at-pos (or (alist-get 'start-point meta) 1)))
                           1)))
                  (insert
                   (format "* [[file:%s::%d][%s]]\n" file pos title)
                   "- Distance :: " (number-to-string d) "\n"
                   paragraph ?\n)
                  (org-fill-paragraph))))
            entry-distances entry-document entry-metadatas)))
       distances documents metadatas)
      (goto-char (point-min))
      (display-buffer (current-buffer)))))

(provide 'semantic-search)
;;; semantic-search.el ends here