aboutsummaryrefslogtreecommitdiffstats
path: root/scratch/semgrep
diff options
context:
space:
mode:
authorOscar Najera <hi@oscarnajera.com>2024-02-04 21:45:42 +0100
committerOscar Najera <hi@oscarnajera.com>2024-02-04 21:45:42 +0100
commitd95bddf2af14e8ac5cc147903435a005039c173e (patch)
tree7316b21a93222a99a4e560dab2cfed0fe03b6a0e /scratch/semgrep
parent62b37b45ddd0acad629dc00e183d57b79f2ccb71 (diff)
downloaddotfiles-d95bddf2af14e8ac5cc147903435a005039c173e.tar.gz
dotfiles-d95bddf2af14e8ac5cc147903435a005039c173e.tar.bz2
dotfiles-d95bddf2af14e8ac5cc147903435a005039c173e.zip
Rename semgrep to semantic-search
Diffstat (limited to 'scratch/semgrep')
-rw-r--r--scratch/semgrep/semantic-search.el (renamed from scratch/semgrep/semgrep.el)76
-rw-r--r--scratch/semgrep/server.py12
2 files changed, 53 insertions, 35 deletions
diff --git a/scratch/semgrep/semgrep.el b/scratch/semgrep/semantic-search.el
index fdfcfdb..e1692d0 100644
--- a/scratch/semgrep/semgrep.el
+++ b/scratch/semgrep/semantic-search.el
@@ -1,23 +1,25 @@
-;;; semgrep.el --- Semantic search -*- lexical-binding: t; -*-
+;;; semantic-search.el --- Search for semantic similarity of text -*- lexical-binding: t; -*-
;;
-;; Copyright (C) 2023 Óscar Nájera
+;; Copyright (C) 2024 Óscar Nájera
;;
;; Author: Óscar Nájera <hi@oscarnajera.com>
;; Maintainer: Óscar Nájera <hi@oscarnajera.com>
-;; Created: November 07, 2023
-;; Modified: November 07, 2023
-;; Version: 0.0.1
-;; Keywords: abbrev bib c calendar comm convenience data docs emulations extensions faces files frames games hardware help hypermedia i18n internal languages lisp local maint mail matching mouse multimedia news outlines processes terminals tex tools unix vc wp
-;; Homepage: https://github.com/titan/semgrep
+;; Created: February 04, 2024
+;; Modified: February 04, 2024
+;; Version: 0.1.0
+;; Keywords: convenience data docs files hypermedia i18n matching tools
+;; Homepage: https://github.com/titan/semantic-search
;; Package-Requires: ((emacs "27.1"))
;;
;; This file is not part of GNU Emacs.
;;
;;; Commentary:
;;
-;; semantically search on my database by paragraph
+;; Search for semantic similarity of documents at a paragraph level
;;
;;; Code:
+
+
(require 'url)
(require 'org-element)
(require 'org-roam-db)
@@ -26,23 +28,24 @@
;; Silence byte-compiler.
(defvar url-http-end-of-headers)
-(defcustom semgrep-server-url "http://localhost:8080"
- "Address where the Python server with the chromadb is listening."
+(defcustom semantic-search-server-url "http://localhost:8080"
+ "Address where the Chromadb server is listening."
:type 'url
- :group 'semgrep)
+ :group 'semantic-search)
-(defun semgrep--connect (method data)
+(defun semantic-search--connect (method data)
+ "Synchronous query to the server."
(let ((url-request-method "POST")
(url-request-extra-headers '(("Content-Type" . "application/json")))
(url-request-data (encode-coding-string
(json-serialize `(,method ,data))
'utf-8)))
(with-current-buffer
- (url-retrieve-synchronously semgrep-server-url)
+ (url-retrieve-synchronously semantic-search-server-url)
(goto-char url-http-end-of-headers)
(json-read))))
-(defun semgrep--get-node-id (paragraph &optional default)
+(defun semantic-search--org-id (paragraph &optional default)
(-->
(org-element-map
(org-element-property :parent paragraph)
@@ -56,31 +59,31 @@
(org-string-nw-p it)
(or it default)))
-(defun semgrep--prepare-paragraph (file-id)
+(defun semantic-search--prepare-paragraph (file-id)
(lambda (paragraph)
(list
:document (substring-no-properties (org-element-interpret-data paragraph))
:metadata (list :start-point
(org-element-property :begin paragraph)
:node-id
- (semgrep--get-node-id paragraph file-id)))))
+ (semantic-search--org-id paragraph file-id)))))
-(defun semgrep--add-buffer ()
+(defun semantic-search--add-buffer ()
(interactive)
(if (eq major-mode 'org-mode)
(-some-->
(org-element-map
(org-element-parse-buffer)
'paragraph
- (semgrep--prepare-paragraph (org-id-get (point-min) 'create)))
+ (semantic-search--prepare-paragraph (org-id-get (point-min) 'create)))
(cl-coerce it 'vector)
;; (json-serialize it)
;; (f-write it 'utf-8 "/tmp/out.json")
;; (message "%S" it)
- (semgrep--connect :insert it))
+ (semantic-search--connect :insert it))
(user-error "This only works on org-mode")))
-(defun semgrep--roam-data (entries)
+(defun semantic-search--roam-data (entries)
(thread-last
(cl-mapcar (lambda (meta)
(alist-get 'node-id meta))
@@ -91,39 +94,46 @@
:from nodes
:where (in id $v1)])))
-(defun semgrep-pick-org-element ()
+(defun semantic-search--del-buffer (org-ids)
+ (interactive (list (org-id-get)))
+ (unless (null org-ids)
+ (semantic-search--connect :delete org-ids)))
+
+(defun semantic-search-pick-org-element ()
(when-let ((context (ignore-errors (org-element-context))))
(filter-buffer-substring (org-element-property :begin context)
(org-element-property :end context))))
-(defun semgrep--sync-db ()
+(defun semantic-search--sync-db ()
(org-roam-dolist-with-progress (file (nreverse (org-roam-list-files)))
"importing to semantic search"
(org-roam-with-file file nil
- (semgrep--add-buffer))))
+ (semantic-search--add-buffer))))
-;; (semgrep--sync-db)
-(defun semgrep-search (text)
- (interactive (list (or (semgrep-pick-org-element)
+;; (semantic-search--sync-db)
+(defun semantic-search (text)
+ (interactive (list (or (semantic-search-pick-org-element)
(read-from-minibuffer "What are you looking for? "))))
(-let (((&alist 'distances 'documents 'metadatas)
- (semgrep--connect :query text)))
+ (semantic-search--connect :query text)))
(with-current-buffer (get-buffer-create "*Semantic Search*")
(erase-buffer)
(org-mode)
(insert "#+title: Looking for:\n" text "\n")
(cl-mapc
(lambda (entry-distances entry-document entry-metadatas)
- (let ((data (semgrep--roam-data entry-metadatas)))
+ (let ((data (semantic-search--roam-data entry-metadatas)))
(cl-mapc
(lambda (d paragraph meta)
(unless (zerop d)
(-let* ((node-id (alist-get 'node-id meta))
((_ title file) (assoc node-id data #'string=))
(pos
- (with-temp-buffer
- (insert-file-contents file)
- (line-number-at-pos (or (alist-get 'start-point meta) 1)))))
+ (if file
+ (with-temp-buffer
+ (insert-file-contents file)
+ (line-number-at-pos (or (alist-get 'start-point meta) 1)))
+ 1)))
(insert
(format "* [[file:%s::%d][%s]]\n" file pos title)
"- Distance :: " (number-to-string d) "\n"
@@ -134,5 +144,5 @@
(goto-char (point-min))
(display-buffer (current-buffer)))))
-(provide 'semgrep)
-;;; semgrep.el ends here
+(provide 'semantic-search)
+;;; semantic-search.el ends here
diff --git a/scratch/semgrep/server.py b/scratch/semgrep/server.py
index 6f3ebcd..becabbb 100644
--- a/scratch/semgrep/server.py
+++ b/scratch/semgrep/server.py
@@ -23,6 +23,11 @@ def ensure_list(data):
raise ValueError("Data must be a list of strings")
+def delete_nodes(nodes):
+ for node in nodes:
+ collection.delete(where={"node-id": node})
+
+
class MyRequestHandler(BaseHTTPRequestHandler):
def do_POST(self):
content_length = int(self.headers["Content-Length"])
@@ -31,6 +36,7 @@ class MyRequestHandler(BaseHTTPRequestHandler):
try:
data = json.loads(post_data)
response_message = f"Received POST request with data: '{data}'\n"
+ self.log_message(response_message)
except ValueError:
response_message = "Invalid JSON data"
self.send_response(400)
@@ -38,12 +44,14 @@ class MyRequestHandler(BaseHTTPRequestHandler):
if query := data.get("query"):
self.log_message("Processing query '%s'", query.replace("\n", " ").strip())
response = collection.query(query_texts=ensure_list(query))
+ elif delete_set := data.get("delete"):
+ delete_nodes(ensure_list(delete_set))
+ response = f"Deleted nodes {delete_set}"
elif paragraphs := data.get("insert"):
data, metadata = drop_duplicates(paragraphs)
nodes = set(m.get("node-id") for m in metadata)
self.log_message("Processing metadata %s", nodes)
- for node in nodes:
- collection.delete(where={"node-id": node})
+ delete_nodes(nodes)
collection.add(
documents=data, metadatas=metadata, ids=list(map(checksum, data))
)