aboutsummaryrefslogtreecommitdiffstats
path: root/scratch/semgrep/semantic-search.el
diff options
context:
space:
mode:
Diffstat (limited to 'scratch/semgrep/semantic-search.el')
-rw-r--r--scratch/semgrep/semantic-search.el148
1 files changed, 148 insertions, 0 deletions
diff --git a/scratch/semgrep/semantic-search.el b/scratch/semgrep/semantic-search.el
new file mode 100644
index 0000000..e1692d0
--- /dev/null
+++ b/scratch/semgrep/semantic-search.el
@@ -0,0 +1,148 @@
+;;; semantic-search.el --- Search for semantic similarity of text -*- lexical-binding: t; -*-
+;;
+;; Copyright (C) 2024 Óscar Nájera
+;;
+;; Author: Óscar Nájera <hi@oscarnajera.com>
+;; Maintainer: Óscar Nájera <hi@oscarnajera.com>
+;; Created: February 04, 2024
+;; Modified: February 04, 2024
+;; Version: 0.1.0
+;; Keywords: convenience data docs files hypermedia i18n matching tools
+;; Homepage: https://github.com/titan/semantic-search
+;; Package-Requires: ((emacs "27.1"))
+;;
+;; This file is not part of GNU Emacs.
+;;
+;;; Commentary:
+;;
+;; Search for semantic similarity of documents at a paragraph level
+;;
+;;; Code:
+
+
+(require 'url)
+(require 'org-element)
+(require 'org-roam-db)
+(require 'dash)
+
+;; Silence byte-compiler.
+(defvar url-http-end-of-headers)
+
+(defcustom semantic-search-server-url "http://localhost:8080"
+ "Address where the Chromadb server is listening."
+ :type 'url
+ :group 'semantic-search)
+
+(defun semantic-search--connect (method data)
+ "Synchronous query to the server."
+ (let ((url-request-method "POST")
+ (url-request-extra-headers '(("Content-Type" . "application/json")))
+ (url-request-data (encode-coding-string
+ (json-serialize `(,method ,data))
+ 'utf-8)))
+ (with-current-buffer
+ (url-retrieve-synchronously semantic-search-server-url)
+ (goto-char url-http-end-of-headers)
+ (json-read))))
+
+(defun semantic-search--org-id (paragraph &optional default)
+ (-->
+ (org-element-map
+ (org-element-property :parent paragraph)
+ 'node-property
+ (lambda (np)
+ (cons
+ (org-element-property :key np)
+ (org-element-property :value np))))
+ (assoc 'ID it #'string=)
+ (cdr it)
+ (org-string-nw-p it)
+ (or it default)))
+
+(defun semantic-search--prepare-paragraph (file-id)
+ (lambda (paragraph)
+ (list
+ :document (substring-no-properties (org-element-interpret-data paragraph))
+ :metadata (list :start-point
+ (org-element-property :begin paragraph)
+ :node-id
+ (semantic-search--org-id paragraph file-id)))))
+
+(defun semantic-search--add-buffer ()
+ (interactive)
+ (if (eq major-mode 'org-mode)
+ (-some-->
+ (org-element-map
+ (org-element-parse-buffer)
+ 'paragraph
+ (semantic-search--prepare-paragraph (org-id-get (point-min) 'create)))
+ (cl-coerce it 'vector)
+ ;; (json-serialize it)
+ ;; (f-write it 'utf-8 "/tmp/out.json")
+ ;; (message "%S" it)
+ (semantic-search--connect :insert it))
+ (user-error "This only works on org-mode")))
+
+(defun semantic-search--roam-data (entries)
+ (thread-last
+ (cl-mapcar (lambda (meta)
+ (alist-get 'node-id meta))
+ entries)
+ (delete-dups)
+ (vconcat)
+ (org-roam-db-query [:select [id title file]
+ :from nodes
+ :where (in id $v1)])))
+
+(defun semantic-search--del-buffer (org-ids)
+ (interactive (list (org-id-get)))
+ (unless (null org-ids)
+ (semantic-search--connect :delete org-ids)))
+
+(defun semantic-search-pick-org-element ()
+ (when-let ((context (ignore-errors (org-element-context))))
+ (filter-buffer-substring (org-element-property :begin context)
+ (org-element-property :end context))))
+
+(defun semantic-search--sync-db ()
+ (org-roam-dolist-with-progress (file (nreverse (org-roam-list-files)))
+ "importing to semantic search"
+ (org-roam-with-file file nil
+ (semantic-search--add-buffer))))
+
+;; (semantic-search--sync-db)
+(defun semantic-search (text)
+ (interactive (list (or (semantic-search-pick-org-element)
+ (read-from-minibuffer "What are you looking for? "))))
+ (-let (((&alist 'distances 'documents 'metadatas)
+ (semantic-search--connect :query text)))
+ (with-current-buffer (get-buffer-create "*Semantic Search*")
+ (erase-buffer)
+ (org-mode)
+ (insert "#+title: Looking for:\n" text "\n")
+ (cl-mapc
+ (lambda (entry-distances entry-document entry-metadatas)
+ (let ((data (semantic-search--roam-data entry-metadatas)))
+ (cl-mapc
+ (lambda (d paragraph meta)
+ (unless (zerop d)
+ (-let* ((node-id (alist-get 'node-id meta))
+ ((_ title file) (assoc node-id data #'string=))
+ (pos
+ (if file
+ (with-temp-buffer
+ (insert-file-contents file)
+ (line-number-at-pos (or (alist-get 'start-point meta) 1)))
+ 1)))
+ (insert
+ (format "* [[file:%s::%d][%s]]\n" file pos title)
+ "- Distance :: " (number-to-string d) "\n"
+ paragraph ?\n)
+ (org-fill-paragraph))))
+ entry-distances entry-document entry-metadatas)))
+ distances documents metadatas)
+ (goto-char (point-min))
+ (display-buffer (current-buffer)))))
+
+(provide 'semantic-search)
+;;; semantic-search.el ends here