├── roam_to_git ├── __init__.py ├── fs.py ├── __main__.py ├── formatter.py └── scrapping.py ├── setup.cfg ├── .gitignore ├── requirements.txt ├── env.template ├── .github ├── workflows │ └── test.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── LICENSE.txt ├── setup.py ├── tests.py └── README.md /roam_to_git/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # Inside of setup.cfg 2 | [metadata] 3 | description-file = README.md 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | logs 2 | notes 3 | downloads 4 | venv/ 5 | env/ 6 | **.pyc 7 | .env 8 | .mypy_cache/ 9 | MANIFEST 10 | dist/ 11 | *.ipynb 12 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # WARNING: don't forget to update setup.py 2 | gitpython>=3.1.* 3 | loguru==0.4.* 4 | pyppeteer>=0.0.25 5 | python-dotenv>=0.10.* 6 | psutil>=5.6.0 7 | -------------------------------------------------------------------------------- /env.template: -------------------------------------------------------------------------------- 1 | # Copy this file to ".env" and fill the values, or configure it on Github secrets if using Github actions 2 | ROAMRESEARCH_USER="YOUR_EMAIL" 3 | ROAMRESEARCH_PASSWORD="YOUR_PASSWORD" 4 | # find it here https://user-images.githubusercontent.com/656694/84388282-98136800-abf4-11ea-84c1-85ffc59b30b0.png 5 | ROAMRESEARCH_DATABASE="YOUR_DATABASE" 6 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: "roam-to-git tests.py" 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - dev 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | name: Test 13 | timeout-minutes: 15 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python 3.8 17 | uses: actions/setup-python@v1 18 | with: 19 | python-version: 3.8 20 | 21 | - name: Setup dependencies 22 | run: | 23 | pip install -r requirements.txt 24 | pip install mypy 25 | - name: Run backup 26 | run: ./tests.py 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: MatthieuBizien 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | Copyright (c) 2018 YOUR NAME 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 17 | SOFTWARE. 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: MatthieuBizien 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Traceback** 24 | Please use http://gist.github.com/ or similar, and report the last line here. 25 | 26 | **Run `roam-to-git --debug notes/` and report what you get.** 27 | It should open a Chrome front-end, an do the scrapping. The repository content will not be modified. If applicable, add screenshots to help explain your problem. 28 | 29 | **Please complete the following information:** 30 | - OS: [e.g. MacOs, Linux] 31 | - Do you use Github Action? 32 | - Do you use multiple Roam databases? 33 | - Does roam-to-git use to work for you? When precisely did it stopped to work? 34 | - Does some backup runs are still working? 35 | 36 | **Additional context** 37 | Add any other context about the problem here. 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | from pkg_resources import parse_requirements 4 | 5 | setup( 6 | name='roam_to_git', 7 | packages=['roam_to_git'], 8 | version='0.1', 9 | license='MIT', 10 | description='Automatic RoamResearch backup to Git', 11 | author='Matthieu Bizien', # Type in your name 12 | author_email='oao2005@gmail.com', # Type in your E-Mail 13 | url='https://github.com/MatthieuBizien/roam-to-git', 14 | download_url='https://github.com/MatthieuBizien/roam-to-git/archive/v0.1.tar.gz', 15 | keywords=['ROAMRESEARCH', 'GIT', 'BACKUP'], 16 | install_requires=[str(requirement) for requirement in 17 | parse_requirements(open("requirements.txt"))], 18 | classifiers=[ 19 | 'Development Status :: 3 - Alpha', 20 | 'Intended Audience :: Developers', 21 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: Wiki', 22 | 'License :: OSI Approved :: MIT License', 23 | 'Programming Language :: Python :: 3', 24 | 'Programming Language :: Python :: 3.6', 25 | 'Programming Language :: Python :: 3.7', 26 | 'Programming Language :: Python :: 3.8', 27 | ], 28 | entry_points={ 29 | 'console_scripts': ['roam-to-git=roam_to_git.__main__:main'], 30 | } 31 | ) 32 | -------------------------------------------------------------------------------- /roam_to_git/fs.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import zipfile 4 | from pathlib import Path 5 | from typing import List, Dict 6 | 7 | import git 8 | from loguru import logger 9 | 10 | 11 | def get_zip_path(zip_dir_path: Path) -> Path: 12 | """Return the path to the single zip file in a directory, and fail if there is not one single 13 | zip file""" 14 | zip_files = list(zip_dir_path.iterdir()) 15 | zip_files = [f for f in zip_files if f.name.endswith(".zip")] 16 | assert len(zip_files) == 1, (zip_files, zip_dir_path) 17 | zip_path, = zip_files 18 | return zip_path 19 | 20 | 21 | def reset_git_directory(git_path: Path, skip=(".git",)): 22 | """Remove all files in a git directory""" 23 | to_remove: List[Path] = [] 24 | for file in git_path.glob("**/*"): 25 | if any(skip_item in file.parts for skip_item in skip): 26 | continue 27 | to_remove.append(file) 28 | # Now we remove starting from the end to remove childs before parents 29 | to_remove = sorted(set(to_remove))[::-1] 30 | for file in to_remove: 31 | if file.is_file(): 32 | file.unlink() 33 | elif file.is_dir(): 34 | if list(file.iterdir()): 35 | logger.debug("Impossible to remove directory {}", file) 36 | else: 37 | file.rmdir() 38 | 39 | 40 | def unzip_markdown_archive(zip_dir_path: Path): 41 | zip_path = get_zip_path(zip_dir_path) 42 | with zipfile.ZipFile(zip_path) as zip_file: 43 | contents = {file.filename: zip_file.read(file.filename).decode() 44 | for file in zip_file.infolist() 45 | if not file.is_dir()} 46 | return contents 47 | 48 | 49 | def save_markdowns(directory: Path, contents: Dict[str, str]): 50 | logger.debug("Saving markdown to {}", directory) 51 | # Format and write the markdown files 52 | for file_name, content in contents.items(): 53 | dest = (directory / file_name) 54 | dest.parent.mkdir(parents=True, exist_ok=True) # Needed if a new directory is used 55 | # We have to specify encoding because crontab on Mac don't use UTF-8 56 | # https://stackoverflow.com/questions/11735363/python3-unicodeencodeerror-crontab 57 | with dest.open("w", encoding="utf-8") as f: 58 | f.write(content) 59 | 60 | 61 | def unzip_and_save_json_archive(zip_dir_path: Path, directory: Path): 62 | logger.debug("Saving json to {}", directory) 63 | directory.mkdir(exist_ok=True) 64 | zip_path = get_zip_path(zip_dir_path) 65 | with zipfile.ZipFile(zip_path) as zip_file: 66 | files = list(zip_file.namelist()) 67 | for file in files: 68 | assert file.endswith(".json") 69 | content = json.loads(zip_file.read(file).decode()) 70 | with open(directory / file, "w") as f: 71 | json.dump(content, f, sort_keys=True, indent=2, ensure_ascii=True) 72 | 73 | 74 | def commit_git_directory(repo: git.Repo): 75 | """Add an automatic commit in a git directory if it has changed, and push it""" 76 | if not repo.is_dirty() and not repo.untracked_files: 77 | # No change, nothing to do 78 | return 79 | logger.debug("Committing git repository {}", repo.git_dir) 80 | repo.git.add(A=True) # https://github.com/gitpython-developers/GitPython/issues/292 81 | repo.index.commit(f"Automatic commit {datetime.datetime.now().isoformat()}") 82 | 83 | 84 | def push_git_repository(repo: git.Repo): 85 | logger.debug("Pushing to origin") 86 | origin = repo.remote(name='origin') 87 | origin.push() 88 | -------------------------------------------------------------------------------- /roam_to_git/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | import os 4 | import sys 5 | import tempfile 6 | import time 7 | from pathlib import Path 8 | 9 | import git 10 | from dotenv import load_dotenv 11 | from loguru import logger 12 | 13 | from roam_to_git.formatter import read_markdown_directory, format_markdown 14 | from roam_to_git.fs import reset_git_directory, unzip_markdown_archive, \ 15 | unzip_and_save_json_archive, commit_git_directory, push_git_repository, save_markdowns 16 | from roam_to_git.scrapping import patch_pyppeteer, scrap, Config 17 | 18 | 19 | @logger.catch(reraise=True) 20 | def main(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument("directory", default=None, nargs="?", 23 | help="Directory of your notes are stored. Default to notes/") 24 | parser.add_argument("--debug", action="store_true", 25 | help="Help debug by opening the browser in the foreground. Note that the " 26 | "git repository will not be updated with that option.") 27 | parser.add_argument("--database", default=None, 28 | help="If you have multiple Roam databases, select the one you want to save." 29 | "Can also be configured with env variable ROAMRESEARCH_DATABASE.") 30 | parser.add_argument("--skip-git", action="store_true", 31 | help="Consider the repository as just a directory, and don't do any " 32 | "git-related action.") 33 | parser.add_argument("--skip-push", action="store_true", 34 | help="Don't git push after commit.") 35 | parser.add_argument("--skip-fetch", action="store_true", 36 | help="Do not download the data from Roam, just update the formatting.") 37 | parser.add_argument("--sleep-duration", type=float, default=2., 38 | help="Duration to wait for the interface. We wait 100x that duration for" 39 | "Roam to load. Increase it if Roam servers are slow, but be careful" 40 | "with the free tier of Github Actions.") 41 | args = parser.parse_args() 42 | 43 | patch_pyppeteer() 44 | if args.directory is None: 45 | git_path = Path("notes").absolute() 46 | else: 47 | git_path = Path(args.directory).absolute() 48 | 49 | if (git_path / ".env").exists(): 50 | logger.info("Loading secrets from {}", git_path / ".env") 51 | load_dotenv(git_path / ".env", override=True) 52 | else: 53 | logger.debug("No secret found at {}", git_path / ".env") 54 | if "ROAMRESEARCH_USER" not in os.environ or "ROAMRESEARCH_PASSWORD" not in os.environ: 55 | logger.error("Please define ROAMRESEARCH_USER and ROAMRESEARCH_PASSWORD, " 56 | "in the .env file of your notes repository, or in environment variables") 57 | sys.exit(1) 58 | config = Config(args.database, debug=args.debug, sleep_duration=float(args.sleep_duration)) 59 | 60 | if args.skip_git: 61 | repo = None 62 | else: 63 | repo = git.Repo(git_path) 64 | assert not repo.bare # Fail fast if it's not a repo 65 | 66 | reset_git_directory(git_path / "formatted") 67 | if not args.skip_fetch: 68 | reset_git_directory(git_path / "json") 69 | reset_git_directory(git_path / "markdown") 70 | 71 | with tempfile.TemporaryDirectory() as markdown_zip_path, \ 72 | tempfile.TemporaryDirectory() as json_zip_path: 73 | markdown_zip_path = Path(markdown_zip_path) 74 | json_zip_path = Path(json_zip_path) 75 | 76 | scrap(markdown_zip_path, json_zip_path, config) 77 | if config.debug: 78 | logger.debug("waiting for the download...") 79 | time.sleep(20) 80 | return 81 | raws = unzip_markdown_archive(markdown_zip_path) 82 | save_markdowns(git_path / "markdown", raws) 83 | unzip_and_save_json_archive(json_zip_path, git_path / "json") 84 | 85 | formatted = format_markdown(read_markdown_directory(git_path / "markdown")) 86 | save_markdowns(git_path / "formatted", formatted) 87 | 88 | if repo is not None: 89 | commit_git_directory(repo) 90 | if not args.skip_push: 91 | push_git_repository(repo) 92 | 93 | 94 | if __name__ == "__main__": 95 | main() 96 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import unittest 3 | from pathlib import Path 4 | from typing import List 5 | 6 | import mypy.api 7 | 8 | from roam_to_git.formatter import extract_links, format_link, format_to_do 9 | 10 | 11 | class TestFormatTodo(unittest.TestCase): 12 | def test_empty(self): 13 | self.assertEqual(format_to_do(""), "") 14 | 15 | def test_no_link(self): 16 | self.assertEqual(format_to_do("string"), "string") 17 | 18 | def test_to_do(self): 19 | self.assertEqual(format_to_do("a\n- {{[[TODO]]}}string"), "a\n- [ ] string") 20 | 21 | def test_done(self): 22 | self.assertEqual(format_to_do("a\n- {{[[DONE]]}}string"), "a\n- [x] string") 23 | 24 | def test_something_else(self): 25 | self.assertEqual(format_to_do("a\n- {{[[ZZZ]]}}string"), "a\n- {{[[ZZZ]]}}string") 26 | 27 | 28 | class TestFormatLinks(unittest.TestCase): 29 | """Test that we correctly format the links""" 30 | 31 | def test_empty(self): 32 | self.assertEqual(format_link(""), "") 33 | 34 | def test_no_link(self): 35 | self.assertEqual(format_link("string"), "string") 36 | 37 | def test_one_link(self): 38 | self.assertEqual(format_link("string [[link]]."), "string [link]().") 39 | 40 | def test_one_link_prefix(self): 41 | self.assertEqual(format_link("string [[link]].", link_prefix="../../"), 42 | "string [link](<../../link.md>).") 43 | 44 | def test_two_links(self): 45 | self.assertEqual(format_link("[[link]] [[other]]"), 46 | "[link]() [other]()") 47 | 48 | def test_one_hashtag(self): 49 | self.assertEqual(format_link("string #link."), "string [link]().") 50 | 51 | def test_two_hashtag(self): 52 | self.assertEqual(format_link("#link #other"), 53 | "[link]() [other]()") 54 | 55 | def test_attribute(self): 56 | self.assertEqual(format_link(" - string:: link"), " - **[string]():** link") 57 | 58 | def test_attribute_then_attribute_like(self): 59 | self.assertEqual(format_link("- attrib:: string:: val"), 60 | "- **[attrib]():** string:: val") 61 | 62 | def test_attribute_with_colon(self): 63 | self.assertEqual(format_link("- attrib:is:: string"), 64 | "- **[attrib:is]():** string") 65 | 66 | def test_attribute_new_line(self): 67 | self.assertEqual(format_link(" - attrib:: string\n " 68 | "- attrib:: string"), 69 | " - **[attrib]():** string\n " 70 | " - **[attrib]():** string") 71 | 72 | def _extract_links(string) -> List[str]: 73 | return [m.group(1) for m in extract_links(string)] 74 | 75 | 76 | class TestExtractLinks(unittest.TestCase): 77 | """Test that we correctly extract the links, for backreference""" 78 | def test_empty(self): 79 | self.assertEqual(_extract_links(""), []) 80 | 81 | def test_no_link(self): 82 | self.assertEqual(_extract_links("string"), []) 83 | 84 | def test_one_link(self): 85 | self.assertEqual(_extract_links("string [[link]]."), ["link"]) 86 | 87 | def test_two_links(self): 88 | self.assertEqual(_extract_links("[[link]] [[other]]"), ["link", "other"]) 89 | 90 | def test_one_hashtag(self): 91 | self.assertEqual(_extract_links("string [[link]]."), ["link"]) 92 | 93 | def test_two_hashtag(self): 94 | self.assertEqual(_extract_links("[[link]] [[other]]"), ["link", "other"]) 95 | 96 | def test_no_attribute(self): 97 | self.assertEqual(_extract_links(" - string: link"), []) 98 | 99 | def test_attribute(self): 100 | self.assertEqual(_extract_links(" - attrib:: link"), ["attrib"]) 101 | 102 | def test_attribute_then_attribute_like(self): 103 | self.assertEqual(_extract_links("- attrib:: link:: val"), ["attrib"]) 104 | 105 | def test_attribute_with_colon(self): 106 | self.assertEqual(_extract_links("- attrib:is:: link"), ["attrib:is"]) 107 | 108 | def test_attribute_new_line(self): 109 | self.assertEqual(_extract_links(" - attrib:: link\n " 110 | "- attrib2:: link"), 111 | ["attrib", "attrib2"]) 112 | 113 | 114 | class TestMypy(unittest.TestCase): 115 | def _test_mypy(self, files: List[str]): 116 | stdout, stderr, exit_status = mypy.api.run(["--ignore-missing-imports", *files]) 117 | self.assertEqual(exit_status, 0) 118 | 119 | def test_mypy_rtg(self): 120 | self._test_mypy(["roam_to_git"]) 121 | 122 | def test_mypy_rtg_and_tests(self): 123 | self._test_mypy(["roam_to_git", "tests.py"]) 124 | 125 | def test_mypy_all(self): 126 | self._test_mypy([str(f) for f in Path(__file__).parent.iterdir() 127 | if f.is_file() and f.name.endswith(".py")]) 128 | 129 | 130 | if __name__ == "__main__": 131 | unittest.main() 132 | -------------------------------------------------------------------------------- /roam_to_git/formatter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from collections import defaultdict 4 | from itertools import takewhile 5 | from pathlib import Path 6 | from typing import Dict, List, Match, Tuple 7 | 8 | 9 | def read_markdown_directory(raw_directory: Path) -> Dict[str, str]: 10 | contents = {} 11 | for file in raw_directory.iterdir(): 12 | if file.is_dir(): 13 | # We recursively add the content of sub-directories. 14 | # They exists when there is a / in the note name. 15 | for child_name, content in read_markdown_directory(file).items(): 16 | contents[f"{file.name}/{child_name}"] = content 17 | if not file.is_file(): 18 | continue 19 | with file.open(encoding="utf-8") as f: 20 | content = file.read_text(encoding="utf-8") 21 | parts = file.parts[len(raw_directory.parts):] 22 | file_name = os.path.join(*parts) 23 | contents[file_name] = content 24 | return contents 25 | 26 | 27 | def get_back_links(contents: Dict[str, str]) -> Dict[str, List[Tuple[str, Match]]]: 28 | # Extract backlinks from the markdown 29 | forward_links = {file_name: extract_links(content) for file_name, content in contents.items()} 30 | back_links: Dict[str, List[Tuple[str, Match]]] = defaultdict(list) 31 | for file_name, links in forward_links.items(): 32 | for link in links: 33 | back_links[f"{link.group(1)}.md"].append((file_name, link)) 34 | return back_links 35 | 36 | 37 | def format_markdown(contents: Dict[str, str]) -> Dict[str, str]: 38 | back_links = get_back_links(contents) 39 | # Format and write the markdown files 40 | out = {} 41 | for file_name, content in contents.items(): 42 | # We add the backlinks first, because they use the position of the caracters 43 | # of the regex matchs 44 | content = add_back_links(content, back_links[file_name]) 45 | 46 | # Format content. Backlinks content will be formatted automatically. 47 | content = format_to_do(content) 48 | link_prefix = "../" * sum("/" in char for char in file_name) 49 | content = format_link(content, link_prefix=link_prefix) 50 | if len(content) > 0: 51 | out[file_name] = content 52 | 53 | return out 54 | 55 | 56 | def format_to_do(contents: str): 57 | contents = re.sub(r"{{\[\[TODO\]\]}} *", r"[ ] ", contents) 58 | contents = re.sub(r"{{\[\[DONE\]\]}} *", r"[x] ", contents) 59 | return contents 60 | 61 | 62 | def extract_links(string: str) -> List[Match]: 63 | out = list(re.finditer(r"\[\[" 64 | r"([^\]\n]+)" 65 | r"\]\]", string)) 66 | # Match attributes 67 | out.extend(re.finditer(r"(?:^|\n) *- " 68 | r"((?:[^:\n]|:[^:\n])+)" # Match everything except :: 69 | r"::", string)) 70 | return out 71 | 72 | 73 | def add_back_links(content: str, back_links: List[Tuple[str, Match]]) -> str: 74 | if not back_links: 75 | return content 76 | files = sorted(set((file_name[:-3], match) for file_name, match in back_links), 77 | key=lambda e: (e[0], e[1].start())) 78 | new_lines = [] 79 | file_before = None 80 | for file, match in files: 81 | if file != file_before: 82 | new_lines.append(f"## [{file}](<{file}.md>)") 83 | file_before = file 84 | 85 | start_context_ = list(takewhile(lambda c: c != "\n", match.string[:match.start()][::-1])) 86 | start_context = "".join(start_context_[::-1]) 87 | 88 | middle_context = match.string[match.start():match.end()] 89 | 90 | end_context_ = takewhile(lambda c: c != "\n", match.string[match.end()]) 91 | end_context = "".join(end_context_) 92 | 93 | context = (start_context + middle_context + end_context).strip() 94 | new_lines.extend([context, ""]) 95 | backlinks_str = "\n".join(new_lines) 96 | return f"{content}\n# Backlinks\n{backlinks_str}\n" 97 | 98 | 99 | def format_link(string: str, link_prefix="") -> str: 100 | """Transform a RoamResearch-like link to a Markdown link. 101 | 102 | @param link_prefix: Add the given prefix before all links. 103 | WARNING: not robust to special characters. 104 | """ 105 | # Regex are read-only and can't parse [[[[recursive]] [[links]]]], but they do the job. 106 | # We use a special syntax for links that can have SPACES in them 107 | # Format internal reference: [[mynote]] 108 | string = re.sub(r"\[\[" # We start with [[ 109 | # TODO: manage a single ] in the tag 110 | r"([^\]\n]+)" # Everything except ] 111 | r"\]\]", 112 | rf"[\1](<{link_prefix}\1.md>)", 113 | string, flags=re.MULTILINE) 114 | 115 | # Format hashtags: #mytag 116 | string = re.sub(r"#([a-zA-Z-_0-9]+)", 117 | rf"[\1](<{link_prefix}\1.md>)", 118 | string, flags=re.MULTILINE) 119 | 120 | # Format attributes 121 | string = re.sub(r"(^ *- )" # Match the beginning, like ' - ' 122 | r"(([^:\n]|:[^:\n])+)" # Match everything except :: 123 | r"::", 124 | rf"\1**[\2](<{link_prefix}\2.md>):**", # Format Markdown link 125 | string, flags=re.MULTILINE) 126 | return string 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Automatic RoamResearch backup 2 | 3 | [![Roam Research backup](https://github.com/MatthieuBizien/roam-to-git-demo/workflows/Roam%20Research%20backup/badge.svg)](https://github.com/MatthieuBizien/roam-to-git-demo/actions) 4 | [![roam-to-git tests.py](https://github.com/MatthieuBizien/roam-to-git/workflows/roam-to-git%20tests.py/badge.svg)](https://github.com/MatthieuBizien/roam-to-git/actions) 5 | 6 | This script helps you backup your [RoamResearch](https://roamresearch.com/) graphs! 7 | 8 | This script automatically 9 | - Downloads a markdown archive of your RoamResearch workspace 10 | - Downloads a json archive of your RoamResearch workspace 11 | - Unzips them to your git directory 12 | - Commits and pushes the difference to Github 13 | 14 | # Demo 15 | [See it in action!](https://github.com/MatthieuBizien/roam-to-git-demo). This repo is updated using roam-to-git. 16 | 17 | # Why to use it 18 | 19 | - You have a backup if RoamResearch loses some of your data. 20 | - You have a history of your notes. 21 | - You can browse your Github repository easily with a mobile device 22 | 23 | 24 | # Use it with Github Actions (recommended) 25 | 26 | ## Create a (private) Github repository for all your notes 27 | 28 | With [gh](https://github.com/cli/cli): `gh repo create notes` (yes, it's private) 29 | 30 | Or [manually](https://help.github.com/en/github/getting-started-with-github/create-a-repo) 31 | 32 | ## Configure Github secrets 33 | 34 | - Go to github.com/your/repository/settings/secrets 35 | 36 | ### 37 | 38 | Add 3 (separate) secrets where the names are 39 | 40 | `ROAMRESEARCH_USER` 41 | 42 | `ROAMRESEARCH_PASSWORD` 43 | 44 | `ROAMRESEARCH_DATABASE` 45 | 46 | - Refer to [env.template](env.template) for more information 47 | 48 | - when inserting the information, there is no need for quotations or assignments 49 | 50 | ![image](https://user-images.githubusercontent.com/173090/90904133-2cf1c900-e3cf-11ea-960d-71d0543b8158.png) 51 | 52 | 53 | ## Add GitHub action 54 | 55 | ``` 56 | cd notes 57 | mkdir -p .github/workflows/ 58 | curl https://raw.githubusercontent.com/MatthieuBizien/roam-to-git-demo/master/.github/workflows/main.yml > \ 59 | .github/workflows/main.yml 60 | git add .github/workflows/main.yml 61 | git commit -m "Add github/workflows/main.yml" 62 | git push --set-upstream origin master 63 | ``` 64 | 65 | ## Check that the Github Action works 66 | 67 | - Go to github.com/your/repository/actions 68 | - Your CI job should start in a few seconds 69 | 70 | ### Note: 71 | 72 | If the backup does not automatically start, try pushing to the repository again 73 | 74 | 75 | # Use it locally 76 | 77 | **Note**: if your file system is not case-sensitive, you will not backup notes that have the same name in different 78 | cases 79 | 80 | ## Install Roam-To-Git 81 | With [pipx](https://github.com/pipxproject/pipx) 82 | (if you don't know pipx, you should look at it, it's wonderful!) 83 | 84 | `pipx install git+https://github.com/MatthieuBizien/roam-to-git.git` 85 | 86 | ## Create a (private) Github repository for all your notes 87 | 88 | With [gh](https://github.com/cli/cli): `gh repo create notes` (yes, it's private) 89 | 90 | Or [manually](https://help.github.com/en/github/getting-started-with-github/create-a-repo) 91 | 92 | Then run `git push --set-upstream origin master` 93 | 94 | ## Configure environment variables 95 | 96 | - `curl https://raw.githubusercontent.com/MatthieuBizien/roam-to-git/master/env.template > notes/.env` 97 | - Fill the .env file: `vi .env` 98 | - Ignore it: `echo .env > notes/.gitignore; cd notes; git add .gitignore; git commit -m "Initial commit"` 99 | 100 | ## Manual backup 101 | 102 | - Run the script: `roam-to-git notes/` 103 | - Check your Github repository, it should be filled with your notes :) 104 | 105 | ## Automatic backup 106 | 107 | One-liner to run it with a [cron](https://en.wikipedia.org/wiki/Cron) every hours: 108 | `echo "0 * * * * '$(which roam-to-git)' '$(pwd)/notes'" | crontab -` 109 | 110 | NB: there are [issues](https://github.com/MatthieuBizien/roam-to-git/issues/43) on Mac with a crontab. 111 | 112 | # Debug 113 | 114 | Making `roam-to-git` foolproof is hard, as it depends on Roam, on Github Action or the local environment, 115 | on software not very stable (`pyppeteer` we still love you 😉 ) 116 | and on the correct user configuration. 117 | 118 | For debugging, please try the following: 119 | 120 | - Check that the environment variables `ROAMRESEARCH_USER`, `ROAMRESEARCH_PASSWORD`, `ROAMRESEARCH_DATABASE` are correctly setup 121 | - Login into Roam using the username and the password. 122 | You may want to ask a new password if you have enabled Google Login, as it solved some user problems. 123 | - Run `roam-to-git --debug` to check the authentification and download work 124 | - Look at the traceback 125 | - Look for similar issues 126 | - If nothing else work, create a new issue with as many details as possible. 127 | I will try my best to understand and help you, no SLA promised 😇 128 | 129 | # Task list 130 | 131 | ## Backup all RoamResearch data 132 | 133 | - [x] Download automatically from RoamResearch 134 | - [x] Create Cron 135 | - [x] Write detailed README 136 | - [x] Publish the repository on Github 137 | - [ ] Download images (they currently visible in Github, but not in the archive so not saved in the repository 😕) 138 | 139 | ## Format the backup to have a good UI 140 | 141 | ### Link formatting to be compatible with Github markdown 142 | - [x] Format `[[links]]` 143 | - [x] Format `#links` 144 | - [x] Format `attribute::` 145 | - [ ] Format `[[ [[link 1]] [[link 2]] ]]` 146 | - [ ] Format `((link))` 147 | 148 | ### Backlink formatting 149 | - [x] Add backlinks reference to the notes files 150 | - [x] Integrate the context into the backlink 151 | - [x] Manage `/` in file names 152 | 153 | ### Other formatting 154 | - [x] Format `{{TODO}}` to be compatible with Github markdown 155 | - [ ] Format `{{query}}`` 156 | 157 | ## Make it for others 158 | - [x] Push it to Github 159 | - [x] Add example repository 160 | - [x] Make the backup directory configurable 161 | - [ ] Publicize it 162 | - [x] [RoamResearch Slack](https://roamresearch.slack.com/) [thread](https://roamresearch.slack.com/archives/CN5MK4D2M/p1588670473431200) 163 | - [ ] [RoamResearch Reddit](https://www.reddit.com/r/RoamResearch/) 164 | - [ ] Twitter 165 | 166 | ## Some ideas, I don't need it, but PR welcome 😀 167 | - [ ] Test it/make it work on Windows 168 | - [x] Pre-configure a CI server so it can run every hour without a computer 169 | Thanks @Stvad for [#4](https://github.com/MatthieuBizien/roam-to-git/issues/4)! 170 | -------------------------------------------------------------------------------- /roam_to_git/scrapping.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import atexit 3 | import os 4 | import sys 5 | from pathlib import Path 6 | from typing import Optional 7 | 8 | import psutil 9 | import pyppeteer.connection 10 | from loguru import logger 11 | from pyppeteer.page import Page 12 | 13 | 14 | def patch_pyppeteer(): 15 | """Fix https://github.com/miyakogi/pyppeteer/issues/178""" 16 | import pyppeteer.connection 17 | original_method = pyppeteer.connection.websockets.client.connect 18 | 19 | def new_method(*args, **kwargs): 20 | kwargs['ping_interval'] = None 21 | kwargs['ping_timeout'] = None 22 | return original_method(*args, **kwargs) 23 | 24 | pyppeteer.connection.websockets.client.connect = new_method 25 | 26 | 27 | async def get_text(page, b, norm=True): 28 | """Get the inner text of an element""" 29 | text = await page.evaluate('(element) => element.textContent', b) 30 | if norm: 31 | text = text.lower().strip() 32 | return text 33 | 34 | 35 | class Config: 36 | def __init__(self, database: Optional[str], debug: bool, sleep_duration: float = 2.): 37 | self.user = os.environ["ROAMRESEARCH_USER"] 38 | self.password = os.environ["ROAMRESEARCH_PASSWORD"] 39 | assert self.user 40 | assert self.password 41 | if database: 42 | self.database: Optional[str] = database 43 | else: 44 | self.database = os.environ["ROAMRESEARCH_DATABASE"] 45 | assert self.database, "Please define the Roam database you want to backup." 46 | self.debug = debug 47 | self.sleep_duration = sleep_duration 48 | 49 | 50 | async def download_rr_archive(output_type: str, 51 | output_directory: Path, 52 | config: Config, 53 | slow_motion=10, 54 | ): 55 | logger.debug("Creating browser") 56 | browser = await pyppeteer.launch(devtools=config.debug, 57 | slowMo=slow_motion, 58 | autoClose=False, 59 | ) 60 | if config.debug: 61 | # We want the browser to stay open for debugging the interface 62 | pages = await browser.pages() 63 | document = pages[0] 64 | return await _download_rr_archive(document, output_type, output_directory, config) 65 | 66 | try: 67 | pages = await browser.pages() 68 | document = pages[0] 69 | return await _download_rr_archive(document, output_type, output_directory, config) 70 | except (KeyboardInterrupt, SystemExit): 71 | logger.debug("Closing browser on interrupt {}", output_type) 72 | await browser.close() 73 | logger.debug("Closed browser {}", output_type) 74 | raise 75 | finally: 76 | logger.debug("Closing browser {}", output_type) 77 | await browser.close() 78 | logger.debug("Closed browser {}", output_type) 79 | 80 | 81 | async def _download_rr_archive(document: Page, 82 | output_type: str, 83 | output_directory: Path, 84 | config: Config, 85 | ): 86 | """Download an archive in RoamResearch. 87 | 88 | :param output_type: Download JSON or Markdown 89 | :param output_directory: Directory where to stock the outputs 90 | """ 91 | if not config.debug: 92 | logger.debug("Configure downloads to {}", output_directory) 93 | cdp = await document.target.createCDPSession() 94 | await cdp.send('Page.setDownloadBehavior', 95 | {'behavior': 'allow', 'downloadPath': str(output_directory)}) 96 | 97 | await signin(document, config, sleep_duration=config.sleep_duration) 98 | 99 | if config.database: 100 | await go_to_database(document, config.database) 101 | 102 | logger.debug("Wait for interface to load") 103 | dot_button = None 104 | for _ in range(100): 105 | # Starting is a little bit slow, so we wait for the button that signal it's ok 106 | await asyncio.sleep(config.sleep_duration) 107 | dot_button = await document.querySelector(".bp3-icon-more") 108 | if dot_button is not None: 109 | break 110 | 111 | # If we have multiple databases, we will be stuck. Let's detect that. 112 | await asyncio.sleep(config.sleep_duration) 113 | strong = await document.querySelector("strong") 114 | if strong: 115 | if "database's you are an admin of" == await get_text(document, strong): 116 | logger.error( 117 | "You seems to have multiple databases. Please select it with the option " 118 | "--database") 119 | sys.exit(1) 120 | 121 | assert dot_button is not None, "All roads leads to Roam, but that one is too long. Try " \ 122 | "again when Roam servers are faster." 123 | 124 | # Click on something empty to remove the eventual popup 125 | # "Sync Quick Capture Notes with Workspace" 126 | await document.mouse.click(0, 0) 127 | 128 | await dot_button.click() 129 | 130 | logger.debug("Launch download popup") 131 | divs_pb3 = await document.querySelectorAll(".bp3-fill") 132 | export_all, = [b for b in divs_pb3 if await get_text(document, b) == 'export all'] 133 | await export_all.click() 134 | await asyncio.sleep(config.sleep_duration) 135 | 136 | async def get_dropdown_button(): 137 | dropdown_button = await document.querySelector(".bp3-dialog .bp3-button-text") 138 | assert dropdown_button is not None 139 | dropdown_button_text = await get_text(document, dropdown_button) 140 | # Defensive check if the interface change 141 | assert dropdown_button_text in ["markdown", "json"], dropdown_button_text 142 | return dropdown_button, dropdown_button_text 143 | 144 | logger.debug("Checking download type") 145 | button, button_text = await get_dropdown_button() 146 | 147 | if button_text != output_type: 148 | logger.debug("Changing output type to {}", output_type) 149 | await button.click() 150 | await asyncio.sleep(config.sleep_duration) 151 | output_type_elems = await document.querySelectorAll(".bp3-text-overflow-ellipsis") 152 | output_type_elem, = [e for e in output_type_elems if await get_text(document, e) == output_type] 153 | await output_type_elem.click() 154 | 155 | # defensive check 156 | await asyncio.sleep(config.sleep_duration) 157 | _, button_text_ = await get_dropdown_button() 158 | assert button_text_ == output_type, (button_text_, output_type) 159 | 160 | logger.debug("Downloading output of type {}", output_type) 161 | buttons = await document.querySelectorAll('button') 162 | export_all_confirm, = [b for b in buttons if await get_text(document, b) == 'export all'] 163 | await export_all_confirm.click() 164 | 165 | logger.debug("Wait download of {} to {}", output_type, output_directory) 166 | if config.debug: 167 | # No way to check because download location is not specified 168 | return 169 | for i in range(1, 60 * 10): 170 | await asyncio.sleep(1) 171 | if i % 60 == 0: 172 | logger.debug("Keep waiting for {}, {}s elapsed", output_type, i) 173 | for file in output_directory.iterdir(): 174 | if file.name.endswith(".zip"): 175 | logger.debug("File {} found for {}", file, output_type) 176 | await asyncio.sleep(1) 177 | return 178 | logger.debug("Waiting too long {}") 179 | raise FileNotFoundError("Impossible to download {} in {}", output_type, output_directory) 180 | 181 | 182 | async def signin(document, config: Config, sleep_duration=1.): 183 | """Sign-in into Roam""" 184 | logger.debug("Opening signin page") 185 | await document.goto('https://roamresearch.com/#/signin') 186 | await asyncio.sleep(sleep_duration) 187 | 188 | logger.debug("Fill email '{}'", config.user) 189 | email_elem = await document.querySelector("input[name='email']") 190 | await email_elem.click() 191 | await email_elem.type(config.user) 192 | 193 | logger.debug("Fill password") 194 | passwd_elem = await document.querySelector("input[name='password']") 195 | await passwd_elem.click() 196 | await passwd_elem.type(config.password) 197 | 198 | logger.debug("Click on sign-in") 199 | buttons = await document.querySelectorAll('button') 200 | signin_confirm, = [b for b in buttons if await get_text(document, b) == 'sign in'] 201 | await signin_confirm.click() 202 | await asyncio.sleep(sleep_duration) 203 | 204 | 205 | async def go_to_database(document, database): 206 | """Go to the database page""" 207 | url = f'https://roamresearch.com/#/app/{database}' 208 | logger.debug(f"Load database from url '{url}'") 209 | await document.goto(url) 210 | 211 | 212 | def _kill_child_process(timeout=50): 213 | procs = psutil.Process().children(recursive=True) 214 | if not procs: 215 | return 216 | logger.debug("Terminate child process {}", procs) 217 | for p in procs: 218 | try: 219 | p.terminate() 220 | except psutil.NoSuchProcess: 221 | pass 222 | gone, still_alive = psutil.wait_procs(procs, timeout=timeout) 223 | if still_alive: 224 | logger.warning(f"Kill child process {still_alive} that was still alive after " 225 | f"'timeout={timeout}' from 'terminate()' command") 226 | for p in still_alive: 227 | try: 228 | p.kill() 229 | except psutil.NoSuchProcess: 230 | pass 231 | 232 | 233 | def scrap(markdown_zip_path: Path, json_zip_path: Path, config: Config): 234 | # Just for easier run from the CLI 235 | markdown_zip_path = Path(markdown_zip_path) 236 | json_zip_path = Path(json_zip_path) 237 | 238 | tasks = [download_rr_archive("markdown", Path(markdown_zip_path), config=config), 239 | download_rr_archive("json", Path(json_zip_path), config=config), 240 | ] 241 | # Register to always kill child process when the script close, to not have zombie process. 242 | # Because of https://github.com/miyakogi/pyppeteer/issues/274 without this patch it does happen 243 | # a lot. 244 | if not config.debug: 245 | atexit.register(_kill_child_process) 246 | if config.debug: 247 | for task in tasks: 248 | # Run sequentially for easier debugging 249 | asyncio.get_event_loop().run_until_complete(task) 250 | logger.warning("Exiting without updating the git repository, " 251 | "because we can't get the downloads with the option --debug") 252 | else: 253 | asyncio.get_event_loop().run_until_complete(asyncio.gather(*tasks)) 254 | logger.debug("Scrapping finished") 255 | --------------------------------------------------------------------------------