From dc45b56759e38da793f8976c94b6a798096c968e Mon Sep 17 00:00:00 2001 From: Oscar Najera Date: Wed, 29 Nov 2023 04:21:37 +0100 Subject: Semantic search server and client --- scratch/semgrep/semgrep.el | 116 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 scratch/semgrep/semgrep.el (limited to 'scratch/semgrep/semgrep.el') diff --git a/scratch/semgrep/semgrep.el b/scratch/semgrep/semgrep.el new file mode 100644 index 0000000..5752a20 --- /dev/null +++ b/scratch/semgrep/semgrep.el @@ -0,0 +1,116 @@ +;;; semgrep.el --- Semantic search -*- lexical-binding: t; -*- +;; +;; Copyright (C) 2023 Óscar Nájera +;; +;; Author: Óscar Nájera +;; Maintainer: Óscar Nájera +;; Created: November 07, 2023 +;; Modified: November 07, 2023 +;; Version: 0.0.1 +;; Keywords: abbrev bib c calendar comm convenience data docs emulations extensions faces files frames games hardware help hypermedia i18n internal languages lisp local maint mail matching mouse multimedia news outlines processes terminals tex tools unix vc wp +;; Homepage: https://github.com/titan/semgrep +;; Package-Requires: ((emacs "27.1")) +;; +;; This file is not part of GNU Emacs. +;; +;;; Commentary: +;; +;; semantically search on my database by paragraph +;; +;;; Code: +(require 'url) +(require 'org-element) +(require 'org-roam-db) +(require 'dash) + +(defun semgrep--connect (method data) + (let ((url-request-method "POST") + (url-request-extra-headers '(("Content-Type" . "application/json"))) + (url-request-data (encode-coding-string + (json-serialize `(,method ,data)) + 'utf-8))) + (with-current-buffer + (url-retrieve-synchronously "http://localhost:8080") + (goto-char url-http-end-of-headers) + (json-read)))) + +(defun semgrep--get-node-id (paragraph &optional default) + (thread-first + (org-element-map + (org-element-property :parent paragraph) + 'node-property + (lambda (np) + (org-element-property :value np))) + (car) + (org-string-nw-p) + (or default))) + +(defun semgrep--prepare-paragraph (file-id) + (lambda (paragraph) + (list + :document (substring-no-properties (org-element-interpret-data paragraph)) + :metadata (list :start-point + (org-element-property :begin paragraph) + :node-id + (semgrep--get-node-id paragraph file-id))))) + +(defun semgrep--add-buffer () + (interactive) + (if (eq major-mode 'org-mode) + (-some--> + (org-element-map + (org-element-parse-buffer) + 'paragraph + (semgrep--prepare-paragraph (org-id-get (point-min)))) + (cl-coerce it 'vector) + ;; (json-serialize it) + ;; (f-write it 'utf-8 "/tmp/out.json") + ;; (message "%S" it) + (semgrep--connect :store it)) + (user-error "This only works on org-mode"))) + +(defun semgrep--roam-data (entries) + (thread-last + (cl-mapcar (lambda (meta) + (alist-get 'node-id meta)) + entries) + (delete-dups) + (vconcat) + (org-roam-db-query [:select [id title] + :from nodes + :where (in id $v1)]))) + +(defun semgrep-search (text) + (interactive (list (or (thing-at-point 'paragraph) + (read-from-minibuffer "What are you looking for? ")))) + (-let (((&alist 'distances 'documents 'metadatas) + (semgrep--connect :query text))) + (with-current-buffer (get-buffer-create "*Semantic Search*") + (erase-buffer) + (org-mode) + (insert "#+title: Looking for:\n" text "\n") + (cl-mapc + (lambda (entry-distances entry-document entry-metadatas) + (let ((data (semgrep--roam-data entry-metadatas))) + (cl-mapc + (lambda (d paragraph meta) + (let* ((node-id (or (alist-get 'node-id meta) "")) + (title (cadr (assoc node-id data #'string=)))) + (unless (zerop d) + (insert + (format "* [[id:%s][%s]]\n" node-id title) + "- Distance :: " (number-to-string d) "\n" + "- point :: " (number-to-string (or (alist-get 'start-point meta) -1)) "\n" + (string-trim paragraph) ?\n)))) + entry-distances entry-document entry-metadatas))) + distances documents metadatas) + (goto-char (point-min)) + (display-buffer (current-buffer))))) + +;; (org-roam-dolist-with-progress (file (org-roam-list-files)) +;; "importing to semantic search" +;; (org-roam-with-file file nil +;; (semgrep--add-buffer))) + +(provide 'semgrep) +;;; semgrep.el ends here -- cgit v1.2.3