1 files changed, 0 insertions, 109 deletions
diff --git a/scratch/semgrep/parse.py b/scratch/semgrep/parse.py
deleted file mode 100644
index 3846e3e..0000000
--- a/scratch/semgrep/parse.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env python3
-
-import orgparse
-import pandas as pd
-import re
-
-
-def org_roam_nodes_to_dataframe(org_file):
-    # Load the org file into an OrgData object
-    org_data = orgparse.load(org_file)
-
-    # Define a function to extract the title of a node
-    def extract_title(node):
-        if node.heading:
-            # If the node has a heading, return it
-            return node.heading
-        else:
-            # Otherwise, extract the title from the org file using a regular expression
-            title_pattern = re.compile(r"^#\+title:\s*(.*)$", re.IGNORECASE)
-            match = title_pattern.search(node.body)
-            if match:
-                return match.group(1)
-            else:
-                # If the title is not found, extract it from the first line of the body
-                return re.sub(
-                    r"#\+title:", "", node.body.split("\n")[0], flags=re.IGNORECASE
-                ).strip()
-
-    # Define a function to recursively extract the bodies of a node and its descendants
-    def extract_node_nested_body(node):
-        body = node.body
-        for child in node.children:
-            body += (
-                "\n"
-                + child.level * "*"
-                + " "
-                + child.heading
-                + "\n"
-                + extract_node_nested_body(child)
-            )
-        return body.strip()
-
-    # Define a function to recursively extract the bodies of a node
-    # and its descendants when they are not other nodes
-    def extract_node_nested_body_exclusive(node):
-        body = node.body
-        for child in node.children:
-            if not child.properties.get("ID") and not child.properties.get("SEARCH"):
-                body += (
-                    "\n"
-                    + child.level * "*"
-                    + " "
-                    + child.heading
-                    + "\n"
-                    + extract_node_nested_body_exclusive(child)
-                )
-        return body.strip()
-
-    # Define a function to build the hierarchy of a node
-    def build_node_hierarchy(node):
-        hierarchy = [extract_title(node)]
-        parent = node.parent
-
-        # while parent and parent != org_data[0]:
-        while parent:
-            hierarchy.append(extract_title(parent))
-            parent = parent.parent
-        return " > ".join(reversed(hierarchy)).strip()
-
-    # Define a function to convert a node to a dictionary
-    def node_to_dict(node, file_name):
-        node_dict = {
-            "file_name": file_name,
-            "node_id": node.properties.get("ID"),
-            "node_title": extract_title(node),
-            "node_hierarchy": build_node_hierarchy(node),
-            "node_text": node.body,
-            "node_text_nested": extract_node_nested_body(node),
-            "node_text_nested_exclusive": extract_node_nested_body_exclusive(node),
-        }
-
-        return node_dict
-
-    # Create a list of all org-roam nodes in the OrgData object
-    nodes = [
-        node_to_dict(node, org_file)
-        for node in org_data[0][:]
-        if node.properties.get("ID")
-    ]
-
-    return pd.DataFrame(nodes)
-
-
-model.encode(
-    "What you need is the gist. Your mind needs the repetition to absorb the information. The true learning is on *doing* not in the input. If you can't use the material, you don't know. There is just a limited amount you can learn by listening. The rest you must do yourself. Courses give you ideas, you must figure out what to do with them."
-)
-model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
-
-query_embedding = model.encode("How big is London")
-passage_embedding = model.encode(
-    [
-        "London has 9,787,426 inhabitants at the 2011 census",
-        "London is known for its finacial district",
-        "London is full of criminals",
-        "Cairo is small",
-    ]
-)
-
-print("Similarity:", util.dot_score(query_embedding, passage_embedding))