diff options
author | Óscar Nájera <hi@oscarnajera.com> | 2022-11-28 15:50:52 +0100 |
---|---|---|
committer | Óscar Nájera <hi@oscarnajera.com> | 2022-11-28 15:50:52 +0100 |
commit | 5168eaf3c7d1808625a3918b0f133bdf26d9c7ec (patch) | |
tree | fdb4493e300ee2ff8d8ba6b006e157b99e47dcc2 | |
parent | 736429c3427b50e64977d7a72c5545a595875567 (diff) | |
download | dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.gz dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.tar.bz2 dotfiles-5168eaf3c7d1808625a3918b0f133bdf26d9c7ec.zip |
import notes from kindle to org file
-rw-r--r-- | bin/kindlenotes2org.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/bin/kindlenotes2org.py b/bin/kindlenotes2org.py new file mode 100644 index 0000000..1769c8c --- /dev/null +++ b/bin/kindlenotes2org.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +r""" +extract book highlights to orgmode +================================== + +Extracts from exported html notes from kindle to org subtree +""" +# Created: Sat Jul 14 02:10:24 2018 +# Author: Óscar Nájera +# License: GPL-3 + +import argparse +from textwrap import fill as filltxt + +from bs4 import BeautifulSoup + + +def html_notes2org(page): + soup = BeautifulSoup(page, "lxml") + + docs = "" + for div in soup.find_all("div"): + hcl = div.attrs.get("class") + if "bookTitle" in hcl: + docs += "* {}".format(div.text.strip()) + if "authors" in hcl: + docs += " -- {}\n".format(div.text.strip()) + if "sectionHeading" in hcl: + docs += "** " + div.text.lstrip() + if "noteText" in hcl: + docs += "#+begin_quote\n{}\n#+end_quote\n\n".format( + filltxt(div.text.strip()) + ) + + return docs + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("file", help="HTML file from kindle export") + args = parser.parse_args() + + with open(args.file, "rb") as fid: + page = fid.read() + + docs = html_notes2org(page) + + with open("sub.org", "w") as fid: + fid.write(docs) + + +if __name__ == "__main__": + main() |