import notes from kindle to org file

author: Óscar Nájera <hi@oscarnajera.com> 2022-11-28 15:50:52 +0100
committer: Óscar Nájera <hi@oscarnajera.com> 2022-11-28 15:50:52 +0100
commit: 5168eaf3c7d1808625a3918b0f133bdf26d9c7ec (patch)
tree: fdb4493e300ee2ff8d8ba6b006e157b99e47dcc2
parent: 736429c3427b50e64977d7a72c5545a595875567 (diff)
download: dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.gz
dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.bz2
dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.zip
1 files changed, 54 insertions, 0 deletions
diff --git a/bin/kindlenotes2org.py b/bin/kindlenotes2org.py
new file mode 100644
index 0000000..1769c8c
--- /dev/null
+++ b/bin/kindlenotes2org.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+r"""
+extract book highlights to orgmode
+==================================
+
+Extracts from exported html notes from kindle to org subtree
+"""
+# Created: Sat Jul 14 02:10:24 2018
+# Author: Óscar Nájera
+# License: GPL-3
+
+import argparse
+from textwrap import fill as filltxt
+
+from bs4 import BeautifulSoup
+
+
+def html_notes2org(page):
+    soup = BeautifulSoup(page, "lxml")
+
+    docs = ""
+    for div in soup.find_all("div"):
+        hcl = div.attrs.get("class")
+        if "bookTitle" in hcl:
+            docs += "* {}".format(div.text.strip())
+        if "authors" in hcl:
+            docs += " -- {}\n".format(div.text.strip())
+        if "sectionHeading" in hcl:
+            docs += "** " + div.text.lstrip()
+        if "noteText" in hcl:
+            docs += "#+begin_quote\n{}\n#+end_quote\n\n".format(
+                filltxt(div.text.strip())
+            )
+
+    return docs
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("file", help="HTML file from kindle export")
+    args = parser.parse_args()
+
+    with open(args.file, "rb") as fid:
+        page = fid.read()
+
+    docs = html_notes2org(page)
+
+    with open("sub.org", "w") as fid:
+        fid.write(docs)
+
+
+if __name__ == "__main__":
+    main()
author	Óscar Nájera <hi@oscarnajera.com>	2022-11-28 15:50:52 +0100
committer	Óscar Nájera <hi@oscarnajera.com>	2022-11-28 15:50:52 +0100
commit	5168eaf3c7d1808625a3918b0f133bdf26d9c7ec (patch)
tree	fdb4493e300ee2ff8d8ba6b006e157b99e47dcc2
parent	736429c3427b50e64977d7a72c5545a595875567 (diff)
download	dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.gz dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.bz2 dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.zip