;;; semantic-search.el --- Search for semantic similarity of text -*- lexical-binding: t; -*- ;; ;; Copyright (C) 2024 Óscar Nájera ;; ;; Author: Óscar Nájera ;; Maintainer: Óscar Nájera ;; Created: February 04, 2024 ;; Modified: February 04, 2024 ;; Version: 0.1.0 ;; Keywords: convenience data docs files hypermedia i18n matching tools ;; Homepage: https://github.com/titan/semantic-search ;; Package-Requires: ((emacs "27.1")) ;; ;; This file is not part of GNU Emacs. ;; ;;; Commentary: ;; ;; Search for semantic similarity of documents at a paragraph level ;; ;;; Code: (require 'url) (require 'org-element) (require 'org-roam-db) (require 'dash) ;; Silence byte-compiler. (defvar url-http-end-of-headers) (defcustom semantic-search-server-url "http://localhost:8080" "Address where the Chromadb server is listening." :type 'url :group 'semantic-search) (defun semantic-search--connect (method data) "Synchronous query to the server." (let ((url-request-method "POST") (url-request-extra-headers '(("Content-Type" . "application/json"))) (url-request-data (encode-coding-string (json-serialize `(,method ,data)) 'utf-8))) (with-current-buffer (url-retrieve-synchronously semantic-search-server-url) (goto-char url-http-end-of-headers) (json-read)))) (defun semantic-search--org-id (paragraph &optional default) (--> (org-element-map (org-element-property :parent paragraph) 'node-property (lambda (np) (cons (org-element-property :key np) (org-element-property :value np)))) (assoc 'ID it #'string=) (cdr it) (org-string-nw-p it) (or it default))) (defun semantic-search--prepare-paragraph (file-id) (lambda (paragraph) (list :document (substring-no-properties (org-element-interpret-data paragraph)) :metadata (list :start-point (org-element-property :begin paragraph) :node-id (semantic-search--org-id paragraph file-id))))) (defun semantic-search--add-buffer () (interactive) (if (eq major-mode 'org-mode) (-some--> (org-element-map (org-element-parse-buffer) 'paragraph (semantic-search--prepare-paragraph (org-id-get (point-min) 'create))) (cl-coerce it 'vector) ;; (json-serialize it) ;; (f-write it 'utf-8 "/tmp/out.json") ;; (message "%S" it) (semantic-search--connect :insert it)) (user-error "This only works on org-mode"))) (defun semantic-search--roam-data (entries) (thread-last (cl-mapcar (lambda (meta) (alist-get 'node-id meta)) entries) (delete-dups) (vconcat) (org-roam-db-query [:select [id title file] :from nodes :where (in id $v1)]))) (defun semantic-search--del-buffer (org-ids) (interactive (list (org-id-get))) (unless (null org-ids) (semantic-search--connect :delete org-ids))) (defun semantic-search-pick-org-element () (when-let ((context (ignore-errors (org-element-context)))) (filter-buffer-substring (org-element-property :begin context) (org-element-property :end context)))) (defun semantic-search--sync-db () (org-roam-dolist-with-progress (file (nreverse (org-roam-list-files))) "importing to semantic search" (org-roam-with-file file nil (semantic-search--add-buffer)))) ;; (semantic-search--sync-db) (defun semantic-search (text) (interactive (list (or (semantic-search-pick-org-element) (read-from-minibuffer "What are you looking for? ")))) (-let (((&alist 'distances 'documents 'metadatas) (semantic-search--connect :query text))) (with-current-buffer (get-buffer-create "*Semantic Search*") (erase-buffer) (org-mode) (insert "#+title: Looking for:\n" text "\n") (cl-mapc (lambda (entry-distances entry-document entry-metadatas) (let ((data (semantic-search--roam-data entry-metadatas))) (cl-mapc (lambda (d paragraph meta) (unless (zerop d) (-let* ((node-id (alist-get 'node-id meta)) ((_ title file) (assoc node-id data #'string=)) (pos (if file (with-temp-buffer (insert-file-contents file) (line-number-at-pos (or (alist-get 'start-point meta) 1))) 1))) (insert (format "* [[file:%s::%d][%s]]\n" file pos title) "- Distance :: " (number-to-string d) "\n" paragraph ?\n) (org-fill-paragraph)))) entry-distances entry-document entry-metadatas))) distances documents metadatas) (goto-char (point-min)) (display-buffer (current-buffer))))) (provide 'semantic-search) ;;; semantic-search.el ends here