).*?<\/div>", match):
69 | clean_div = div.replace("
", "").replace("
", "")
70 | match = match.replace(div, clean_div)
71 | self._html = self._html.replace("[evernote-dump-table-cleaner]", match)
72 |
73 | # Insert a title to be parsed in markdown
74 | self._html = ("
" + self._title + "
" + self._html).encode('utf-8')
75 |
76 | def convert_evernote_markings(self):
77 | self.convert_evernote_markings_attachments()
78 |
79 | replacements = (
80 | # Handle Checkboxes
81 | # without this workaround html2text will convert '-' to '\\-' because there is space after dash
82 | ('
', '-
[ ] '),
83 | ('', '- [ ] '),
84 | ('', '- [x] '),
85 | ('', '- [x] '),
86 | ('', ''),
87 | )
88 |
89 | for take, give in replacements:
90 | self._html = self._html.replace(take, give)
91 |
92 | def convert_code_blocks(self):
93 | soup = BeautifulSoup(self._html, "html.parser")
94 | code_block = re.compile(r"-en-codeblock:true")
95 | for block in soup.findAll("div", style=code_block):
96 | block.insert_before('```')
97 | block.insert_after('```')
98 | self._html = str(soup)
99 |
100 | def convert_evernote_markings_attachments(self):
101 | # Find all attachment links in notes
102 | matches = re.findall(r'', self._html)
103 |
104 | # Replace all attachments links with a hash placeholder
105 | for i in range(len(matches)):
106 | _hash = re.findall(r'[a-zA-Z0-9]{32}', matches[i])
107 | if_image = "!" if "image" in matches[i] else ""
108 | placeholder = "\n%s[noteattachment%d][%s]" % (if_image, i + 1, _hash[0])
109 | self._html = self._html.replace(matches[i], placeholder)
110 |
111 | def convert_html_to_markdown(self):
112 | self._markdown = self.html2text.handle(self._html.decode('utf-8'))
113 |
114 | def create_file(self):
115 | with open(os.path.join(self._path, self._filename), 'w', encoding='UTF-8', errors='replace') as outfile:
116 | outfile.write(self._markdown)
117 | os.utime(os.path.join(self._path, self._filename),
118 | (self._created_date.timestamp(), self._updated_date.timestamp()))
119 |
120 | def create_filename(self):
121 | # make sure title can be converted to filename
122 | if not any(char.isalpha() or char.isdigit() for char in self._title):
123 | self._title = "_" + str(self._uuid)
124 |
125 | self._filename = check_for_double(make_dir_check(self._path), path_safe_string(self._title[:128]) + ".md")
126 |
127 | def create_placeholders(self):
128 | # Create place holder to preserve spaces and tabs
129 | self._html = self._html.replace(" ", "[endumptab]")
130 | self._html = self._html.replace(" ", "[endumpspace]")
131 |
132 | def restore_placeholders(self):
133 | self._markdown = self._markdown.replace("[endumptab]", "\t")
134 | self._markdown = self._markdown.replace("[endumpspace]", " ")
135 |
136 | def clean_markdown(self):
137 | self._markdown = '\n'.join([line.rstrip() for line in self._markdown.splitlines()])
138 |
139 | def create_markdown(self):
140 | self.create_placeholders()
141 | self.clean_html()
142 | self.convert_html_to_markdown()
143 | self.restore_placeholders()
144 | self.create_markdown_attachments()
145 | if len(self._tags) > 0:
146 | self.create_markdown_note_tags()
147 | self.create_markdown_note_attr()
148 | self.clean_markdown()
149 | self.create_file()
150 |
151 | def create_markdown_attachments(self):
152 | # Appends the attachment information in markdown format to self.__markdown
153 | if len(self._attachments) > 0:
154 | self._markdown += "\n---"
155 | self._markdown += "\n### ATTACHMENTS"
156 | for i in range(len(self._attachments)):
157 | self._markdown += "\n[%s]: %s%s" % (
158 | self._attachments[i].get_hash(), self.MEDIA_PATH, self._attachments[i].get_filename())
159 | self._markdown += self._attachments[i].get_attributes()
160 |
161 | def create_markdown_note_attr(self):
162 | self._markdown += "\n---"
163 | self._markdown += "\n### NOTE ATTRIBUTES"
164 | self._markdown += "\n>Created Date: " + self._created_date.strftime(self.TIME_FORMAT) + " "
165 | self._markdown += "\n>Last Evernote Update Date: " + self._updated_date.strftime(self.TIME_FORMAT) + " "
166 | if len(self._attributes) > 0:
167 | for attr in self._attributes:
168 | self._markdown += "\n>%s: %s " % (attr[0], attr[1])
169 |
170 | def create_markdown_note_tags(self):
171 | self._markdown += "\n\n---"
172 | self._markdown += "\n### TAGS\n"
173 | tags = ' '.join(['{%s}' % tag for tag in self._tags])
174 | tags += "\n"
175 | self._markdown += tags
176 |
177 | def finalize(self):
178 | self.create_markdown()
179 |
180 | def get_created_date(self):
181 | return self._created_date
182 |
183 | def get_filename(self):
184 | return self._filename
185 |
186 | def get_title(self):
187 | return self._title
188 |
189 | def get_uuid(self):
190 | return self._uuid
191 |
192 | def new_attachment(self):
193 | self._attachments.append(Attachment())
194 |
195 | def set_created_date(self, date_string):
196 | try:
197 | self._created_date = datetime.strptime(date_string, self.ISO_DATE_FORMAT)
198 | except (TypeError, ValueError):
199 | self._created_date = datetime.now()
200 |
201 | def set_updated_date(self, date_string):
202 | try:
203 | self._updated_date = datetime.strptime(date_string, self.ISO_DATE_FORMAT)
204 | except (TypeError, ValueError):
205 | self._created_date = datetime.now()
206 |
207 | def set_path(self, path):
208 | self._path = path
209 |
210 | def set_title(self, title):
211 | self._title = title
212 | self.create_filename()
213 |
--------------------------------------------------------------------------------
/evernote_dump/note_parser/note_parser.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | import os
4 | from xml.sax import ContentHandler
5 |
6 | from ..utilities.settings import Settings
7 | from .note import Note, Attachment
8 |
9 |
10 | class NoteParser(ContentHandler):
11 | """
12 | Handles all lines of the enex file in a streaming manner.
13 | Large files can be parsed since it is not loaded to memory.
14 |
15 | :param current_file
16 | :param settings: Settings is a custom class to pass application wide settings.
17 | :param print_fun: func Pass in a callback function that will be passed a string for printing
18 | and disable printing to console.
19 | """
20 |
21 | def __init__(self, current_file, settings: Settings, print_func=None):
22 | super().__init__()
23 | self.settings = settings
24 | self.print_func = print_func
25 |
26 | self.current_file = current_file
27 |
28 | self.CurrentData = ""
29 | self.in_note_attributes = False
30 | self.in_resource_attributes = False
31 | self.note = None
32 | self.attachment = None
33 | self.path = settings.export_path
34 |
35 | def print_message(self, message: str):
36 | if self.print_func:
37 | self.print_func(message)
38 | else:
39 | print(message)
40 |
41 | def startElement(self, tag, attributes):
42 | """ Called when a new element is found """
43 | self.CurrentData = tag
44 | if tag == "en-export": # First tag found in .enex file
45 | self.print_message("\n####EXPORT STARTED####")
46 | elif tag == "note": # New note found
47 | self.note = Note()
48 | self.note.set_path(os.path.join(self.path, self.current_file))
49 | elif tag == "resource": # Found an attachment
50 | self.attachment = Attachment()
51 | self.attachment.set_path(os.path.join(self.path, self.current_file))
52 | self.attachment.set_created_date(self.note.get_created_date())
53 | self.attachment.set_filename(self.note.get_title())
54 | self.attachment.set_uuid(self.note.get_uuid())
55 | elif tag == "note-attributes":
56 | self.in_note_attributes = True
57 | elif tag == "resource-attributes":
58 | self.in_resource_attributes = True
59 |
60 | def endElement(self, tag):
61 | """Called at the end of an element"""
62 | if tag == "title":
63 | self.print_message(f"\nProcessing Note: {self.note.get_title()}")
64 | elif tag == "content":
65 | pass
66 | elif tag == "resource":
67 | self.print_message(f"---Exporting Attachment: {self.attachment.get_filename()}")
68 | self.attachment.finalize(self.settings)
69 | self.in_resource_attributes = False
70 | elif tag == "data":
71 | self.note.add_attachment(self.attachment)
72 | elif tag == "note": # Last tag called before starting a new note
73 | self.print_message(f"---Exporting Note: {self.note.get_filename()}")
74 | self.note.finalize()
75 | elif tag == "note-attributes":
76 | self.in_note_attributes = False
77 | elif tag == "en-export": # Last tag closed in the whole .enex file
78 | self.print_message("\n####EXPORT FINISHED####\n")
79 |
80 | def characters(self, content_stream):
81 | """Content Stream"""
82 | if self.CurrentData == "title":
83 | self.note.set_title(content_stream)
84 | elif self.CurrentData == "content":
85 | self.note.append_html(content_stream)
86 | elif self.CurrentData == "created":
87 | self.note.set_created_date(content_stream)
88 | elif self.CurrentData == "updated":
89 | self.note.set_updated_date(content_stream)
90 | elif self.CurrentData == "tag":
91 | self.note.append_tag(content_stream)
92 | elif self.CurrentData == "data":
93 | self.attachment.data_stream_in(content_stream)
94 | elif self.CurrentData == "mime":
95 | self.attachment.set_mime(content_stream)
96 | elif self.CurrentData == "file-name":
97 | if (not self.settings.use_note_title_for_attachments):
98 | self.attachment.set_filename(content_stream)
99 |
100 | if self.in_note_attributes:
101 | self.note.add_found_attribute(self.CurrentData, content_stream)
102 | if self.in_resource_attributes:
103 | self.attachment.add_found_attribute(self.CurrentData, content_stream)
104 |
--------------------------------------------------------------------------------
/evernote_dump/utilities/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/evernote_dump/utilities/__init__.py
--------------------------------------------------------------------------------
/evernote_dump/utilities/settings.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 |
5 | class Settings:
6 |
7 | def __init__(self):
8 | self.files = []
9 | self.export_path = ""
10 | self.preserve_file_names = False
11 | self.use_note_title_for_attachments = False
12 |
13 | @property
14 | def path(self):
15 | return self.export_path
16 |
17 | @path.setter
18 | def path(self, path: str):
19 | self.export_path = path
20 |
21 | @property
22 | def p(self):
23 | return self.preserve_file_names
24 |
25 | @p.setter
26 | def p(self, p: bool):
27 | self.preserve_file_names = p
28 |
29 | @property
30 | def n(self):
31 | return self.use_note_title_for_attachments
32 |
33 | @n.setter
34 | def n(self, n: bool):
35 | self.use_note_title_for_attachments = n
36 |
37 | @property
38 | def enex(self):
39 | return self.files
40 |
41 | @enex.setter
42 | def enex(self, files: list):
43 | for file in files:
44 | if '.enex' in file:
45 | self.files.append(file)
46 |
47 |
--------------------------------------------------------------------------------
/evernote_dump/utilities/tool_kit.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | import os
4 |
5 |
6 | def check_for_double(path: str, filename: str) -> str:
7 | """
8 | Searches a directory for files with the desired filename.
9 | If a match is found a new filename will be created with a numbers system.
10 | Useful to prevent over-writing files.
11 |
12 | :param path: str Path to directory where the new file will be created.
13 | :param filename: str Filename to check for duplicates.
14 |
15 | :return: Returns an updated filename with an ascending number if duplicates found.
16 | :rtype: str
17 | """
18 | double_counter = 2
19 | temp_file_name = filename
20 | while os.path.exists(os.path.join(path, temp_file_name)):
21 | if len(filename.rsplit('.', 1)) > 1:
22 | temp_file_name = filename.rsplit('.', 1)[0] + \
23 | '-' + str(double_counter) + '.' + \
24 | filename.rsplit('.', 1)[1]
25 | else:
26 | temp_file_name += '-' + str(double_counter)
27 | double_counter += 1
28 | return temp_file_name
29 |
30 |
31 | def make_dir_check(path: str) -> str:
32 | """
33 | # Check if path exists. If not found path is created
34 | # and the path is returned.
35 |
36 | :param path: str location of new directory
37 |
38 | :return: path
39 | :rtype: str
40 | """
41 | if not os.path.exists(path):
42 | os.makedirs(path)
43 |
44 | return path
45 |
46 |
47 | def multi_choice(choices: tuple) -> int:
48 | """
49 | Prints the choices to console including a number for choosing.
50 |
51 | :param choices: tuple Strings for user to make a choice
52 |
53 | :return: Returns the index value for the choice
54 | :rtype: int
55 | """
56 | phrase = ''
57 | for i in range(len(choices)):
58 | phrase += choices[i] + '[' + str(i + 1) + '] '
59 |
60 | while True:
61 | result = int(input(phrase))
62 |
63 | if 0 <= result < len(choices):
64 | return result
65 |
66 |
67 | def path_safe_string(text: str) -> str:
68 | """
69 | Cleans the provided string for export to file system.
70 |
71 | :param text: string Text to be cleaned.
72 |
73 | :return: Cleaned string
74 | :rtype: str
75 | """
76 | for c in r'[]/\;,><&*:%=+@!#^()|?^':
77 | text = text.replace(c, '')
78 |
79 | clean = (("/", "/"), ("*", "*"), (":", ":"), ("¥", "¥"),
80 | ("?", "?"), ('"', "“"), ("<", "<"), (">", ">"), ("|", "-"))
81 |
82 | for a, b in clean:
83 | text = text.replace(a, b)
84 |
85 | return text
86 |
--------------------------------------------------------------------------------
/images/export_menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/images/export_menu.png
--------------------------------------------------------------------------------
/images/menu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/images/menu.png
--------------------------------------------------------------------------------
/images/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/images/screenshot.png
--------------------------------------------------------------------------------
/images/select_all.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/images/select_all.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | html2text
2 | bs4
3 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/exomut/evernote-dump/fa3dae0c8918a1a111c9ad7f6f59a1b6b16752dc/test/__init__.py
--------------------------------------------------------------------------------
/test/test_evernote_dump.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | import unittest
4 | import os
5 | import shutil
6 |
7 | from evernote_dump import dump
8 | from evernote_dump.utilities.settings import Settings
9 |
10 |
11 | class TestEvernoteDump(unittest.TestCase):
12 |
13 | def setUp(self) -> None:
14 | self.s = Settings()
15 |
16 | # TODO: Add test for preserved file names
17 |
18 | def test_run_parse_single_file_with_out_overwrite(self):
19 | self.s.files = ['data/Archives.enex']
20 | dump.run_parse(self.s)
21 | self.assertTrue(os.path.isdir('Archives'))
22 | shutil.rmtree('Archives')
23 |
24 | def test_run_parse_auto_rename_do_not_overwrite(self):
25 | self.s.files =['data/Check It Out.enex', 'data/Check It Out.enex']
26 | dump.run_parse(self.s)
27 | dump.run_parse(self.s)
28 | self.assertTrue(os.path.isdir('Check It Out'))
29 | self.assertTrue(os.path.isfile('Check It Out/Great Chili-2.md'))
30 | shutil.rmtree('Check It Out')
31 |
32 | def test_run_parse_multiple_files(self):
33 | self.s.files =['data/Archives.enex', 'data/Recipes.enex']
34 | dump.run_parse(self.s)
35 | self.assertTrue(os.path.isdir('Archives'))
36 | self.assertTrue(os.path.isdir('Recipes'))
37 | shutil.rmtree('Archives')
38 | shutil.rmtree('Recipes')
39 |
40 | def test_run_parse_with_spaces_in_file_name(self):
41 | self.s.files = ['data/Check It Out.enex', ]
42 | dump.run_parse(self.s)
43 | self.assertTrue(os.path.isdir('Check It Out'))
44 | shutil.rmtree('Check It Out')
45 |
--------------------------------------------------------------------------------