From d95bddf2af14e8ac5cc147903435a005039c173e Mon Sep 17 00:00:00 2001 From: Oscar Najera Date: Sun, 4 Feb 2024 21:45:42 +0100 Subject: Rename semgrep to semantic-search --- scratch/semgrep/semantic-search.el | 148 +++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 scratch/semgrep/semantic-search.el (limited to 'scratch/semgrep/semantic-search.el') diff --git a/scratch/semgrep/semantic-search.el b/scratch/semgrep/semantic-search.el new file mode 100644 index 0000000..e1692d0 --- /dev/null +++ b/scratch/semgrep/semantic-search.el @@ -0,0 +1,148 @@ +;;; semantic-search.el --- Search for semantic similarity of text -*- lexical-binding: t; -*- +;; +;; Copyright (C) 2024 Óscar Nájera +;; +;; Author: Óscar Nájera +;; Maintainer: Óscar Nájera +;; Created: February 04, 2024 +;; Modified: February 04, 2024 +;; Version: 0.1.0 +;; Keywords: convenience data docs files hypermedia i18n matching tools +;; Homepage: https://github.com/titan/semantic-search +;; Package-Requires: ((emacs "27.1")) +;; +;; This file is not part of GNU Emacs. +;; +;;; Commentary: +;; +;; Search for semantic similarity of documents at a paragraph level +;; +;;; Code: + + +(require 'url) +(require 'org-element) +(require 'org-roam-db) +(require 'dash) + +;; Silence byte-compiler. +(defvar url-http-end-of-headers) + +(defcustom semantic-search-server-url "http://localhost:8080" + "Address where the Chromadb server is listening." + :type 'url + :group 'semantic-search) + +(defun semantic-search--connect (method data) + "Synchronous query to the server." + (let ((url-request-method "POST") + (url-request-extra-headers '(("Content-Type" . "application/json"))) + (url-request-data (encode-coding-string + (json-serialize `(,method ,data)) + 'utf-8))) + (with-current-buffer + (url-retrieve-synchronously semantic-search-server-url) + (goto-char url-http-end-of-headers) + (json-read)))) + +(defun semantic-search--org-id (paragraph &optional default) + (--> + (org-element-map + (org-element-property :parent paragraph) + 'node-property + (lambda (np) + (cons + (org-element-property :key np) + (org-element-property :value np)))) + (assoc 'ID it #'string=) + (cdr it) + (org-string-nw-p it) + (or it default))) + +(defun semantic-search--prepare-paragraph (file-id) + (lambda (paragraph) + (list + :document (substring-no-properties (org-element-interpret-data paragraph)) + :metadata (list :start-point + (org-element-property :begin paragraph) + :node-id + (semantic-search--org-id paragraph file-id))))) + +(defun semantic-search--add-buffer () + (interactive) + (if (eq major-mode 'org-mode) + (-some--> + (org-element-map + (org-element-parse-buffer) + 'paragraph + (semantic-search--prepare-paragraph (org-id-get (point-min) 'create))) + (cl-coerce it 'vector) + ;; (json-serialize it) + ;; (f-write it 'utf-8 "/tmp/out.json") + ;; (message "%S" it) + (semantic-search--connect :insert it)) + (user-error "This only works on org-mode"))) + +(defun semantic-search--roam-data (entries) + (thread-last + (cl-mapcar (lambda (meta) + (alist-get 'node-id meta)) + entries) + (delete-dups) + (vconcat) + (org-roam-db-query [:select [id title file] + :from nodes + :where (in id $v1)]))) + +(defun semantic-search--del-buffer (org-ids) + (interactive (list (org-id-get))) + (unless (null org-ids) + (semantic-search--connect :delete org-ids))) + +(defun semantic-search-pick-org-element () + (when-let ((context (ignore-errors (org-element-context)))) + (filter-buffer-substring (org-element-property :begin context) + (org-element-property :end context)))) + +(defun semantic-search--sync-db () + (org-roam-dolist-with-progress (file (nreverse (org-roam-list-files))) + "importing to semantic search" + (org-roam-with-file file nil + (semantic-search--add-buffer)))) + +;; (semantic-search--sync-db) +(defun semantic-search (text) + (interactive (list (or (semantic-search-pick-org-element) + (read-from-minibuffer "What are you looking for? ")))) + (-let (((&alist 'distances 'documents 'metadatas) + (semantic-search--connect :query text))) + (with-current-buffer (get-buffer-create "*Semantic Search*") + (erase-buffer) + (org-mode) + (insert "#+title: Looking for:\n" text "\n") + (cl-mapc + (lambda (entry-distances entry-document entry-metadatas) + (let ((data (semantic-search--roam-data entry-metadatas))) + (cl-mapc + (lambda (d paragraph meta) + (unless (zerop d) + (-let* ((node-id (alist-get 'node-id meta)) + ((_ title file) (assoc node-id data #'string=)) + (pos + (if file + (with-temp-buffer + (insert-file-contents file) + (line-number-at-pos (or (alist-get 'start-point meta) 1))) + 1))) + (insert + (format "* [[file:%s::%d][%s]]\n" file pos title) + "- Distance :: " (number-to-string d) "\n" + paragraph ?\n) + (org-fill-paragraph)))) + entry-distances entry-document entry-metadatas))) + distances documents metadatas) + (goto-char (point-min)) + (display-buffer (current-buffer))))) + +(provide 'semantic-search) +;;; semantic-search.el ends here -- cgit v1.2.3