├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── setup.py
├── tests
    ├── __init__.py
    ├── conftest.py
    ├── test_adapter.py
    ├── test_joplin.py
    └── test_wiz.py
├── w2j
    ├── __init__.py
    ├── __main__.py
    ├── adapter.py
    ├── joplin.py
    ├── parser.py
    └── wiz.py
└── wiznoteformac.png


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | output/
131 | .vscode/
132 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <https://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <https://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <https://www.gnu.org/licenses/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Migrate from WizNote to Joplin.
  2 | 
  3 | ## !!!CAUTION!!!!
  4 | 
  5 | wiz2joplin has only been tested in wizNote for Mac 2.8.7. AFAIK, the folder structure of the macOS and Windows versions of wizNote may be different.
  6 | 
  7 | If you can provide a pull request for wizNote of Windows, I believe it will be helpful to many people.
  8 | 
  9 | ## Dependency
 10 | 
 11 | - Python 3.9
 12 | - macOS Catalina or above
 13 | - wizNote for Mac 2.8.7 (2020.8.20 10:28)
 14 | - ![wiznote for macOS](wiznoteformac.png)
 15 | 
 16 | ## Installation
 17 | 
 18 | To install this tool, you can use pip:
 19 | 
 20 | ```
 21 | python -m venv ~/w2j/venv
 22 | source ~/w2j/venv/bin/activate
 23 | pip install w2j
 24 | ```
 25 | 
 26 | Alternatively, you can install the package using the bundled setup script:
 27 | 
 28 | ```
 29 | python -m venv ~/w2j/venv
 30 | source ~/w2j/venv/bin/activate
 31 | python setup.py install
 32 | ```
 33 | 
 34 | ## Usage
 35 | 
 36 | If your WizNote user id is `youremail@yourdomain.com`, the token in Joplin Web Clipper is `aa630825022a340ecbe5d3e2f25e5f6a`, and Joplin run on the same computer, you can use wiz2joplin like follows.
 37 | 
 38 | Convert all of documents from wizNote to Joplin:
 39 | 
 40 | ``` shell
 41 | w2j -o ~/w2j -w ~/.wiznote -u youremail@yourdomain.com -t aa630825022a340ecbe5d3e2f25e5f6a -a
 42 | ```
 43 | 
 44 | Convert location `/My Notes/reading/` and all of the children documents from WizNote to Joplin:
 45 | 
 46 | ``` shell
 47 | w2j -o ~/w2j -w ~/.wiznote -u youremail@yourdomain.com -t aa630825022a340ecbe5d3e2f25e5f6a -l '/My Note/reading/' -r
 48 | 
 49 | ```
 50 | 
 51 | Use `w2j --help` to show usage for w2j:
 52 | 
 53 | ```
 54 | usage: w2j [-h] --output OUTPUT --wiz-dir WIZNOTE_DIR --wiz-user
 55 |            WIZNOTE_USER_ID --joplin-token JOPLIN_TOKEN
 56 |            [--joplin-host JOPLIN_HOST] [--joplin-port JOPLIN_PORT]
 57 |            [--location LOCATION] [--location-children] [--all]
 58 | 
 59 | Migrate from WizNote to Joplin.
 60 | 
 61 | optional arguments:
 62 |   -h, --help            show this help message and exit
 63 |   --output OUTPUT, -o OUTPUT
 64 |                         The output dir for unziped WizNote file and log file.
 65 |                         e.g. ~/wiz2joplin_output or
 66 |                         C:\Users\zrong\wiz2joplin_output
 67 |   --wiz-dir WIZNOTE_DIR, -w WIZNOTE_DIR
 68 |                         Set the data dir of WizNote. e.g ~/.wiznote or
 69 |                         C:\Program Files\WizNote
 70 |   --wiz-user WIZNOTE_USER_ID, -u WIZNOTE_USER_ID
 71 |                         Set your user id(login email) of WizNote.
 72 |   --joplin-token JOPLIN_TOKEN, -t JOPLIN_TOKEN
 73 |                         Set the authorization token to access Joplin Web
 74 |                         Clipper Service.
 75 |   --joplin-host JOPLIN_HOST, -n JOPLIN_HOST
 76 |                         Set the host of your Joplin Web Clipper Service,
 77 |                         default is 127.0.0.1
 78 |   --joplin-port JOPLIN_PORT, -p JOPLIN_PORT
 79 |                         Set the port of your Joplin Web Clipper Service,
 80 |                         default is 41184
 81 |   --location LOCATION, -l LOCATION
 82 |                         Convert the location of WizNote, e.g. /My Notes/. If
 83 |                         you use the --all parameter, then skip --location
 84 |                         parameter.
 85 |   --location-children, -r
 86 |                         Use with --location parameter, convert all children
 87 |                         location of --location.
 88 |   --all, -a             Convert all documents of your WizNote.
 89 | ```
 90 | 
 91 | ## Log file
 92 | 
 93 | Please read log file `w2j.log` under --output directory to check the conversion states.
 94 | 
 95 | ## 源码分析相关文章
 96 | 
 97 | - [从 WizNote 为知笔记到 Joplin（上）](https://blog.zengrong.net/post/wiznote2joplin1/)
 98 | - [从 WizNote 为知笔记到 Joplin（下）](https://blog.zengrong.net/post/wiznote2joplin2/)
 99 | - [WizNote 为知笔记 macOS 本地文件夹分析](https://blog.zengrong.net/post/analysis-of-wiznote/)
100 | - [使用腾讯云对象存储(COS)实现Joplin同步](https://blog.zengrong.net/post/joplin-sync-use-cos/)
101 | - [配置 Joplin Server 实现同步](https://blog.zengrong.net/post/joplin-server-config/)
102 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | idna==2.10
2 | chardet
3 | httpx
4 | inscriptis


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import re
 3 | from setuptools import setup, find_packages
 4 | 
 5 | here = Path(__file__).parent
 6 | 
 7 | def read(*parts):
 8 |     """ 读取一个文件并返回内容
 9 |     """
10 |     return here.joinpath(*parts).read_text(encoding='utf8')
11 | 
12 | def find_version(*file_paths):
13 |     """ 从 __init__.py 的 __version__ 变量中提取版本号
14 |     """
15 |     version_file = read(*file_paths)
16 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
17 |                               version_file, re.M)
18 |     if version_match:
19 |         return version_match.group(1)
20 |     raise RuntimeError("Unable to find version string.")
21 | 
22 | def find_requires(*file_paths):
23 |     """ 将提供的 requirements.txt 按行转换成 list
24 |     """
25 |     require_file = read(*file_paths)
26 |     return require_file.splitlines()
27 | 
28 | def static_requires():
29 |     return ['idna==2.10', 'chardet', 'httpx', 'inscriptis']
30 | 
31 | classifiers = [
32 |     'Programming Language :: Python :: 3.9',
33 |     'Development Status :: 4 - Beta',
34 |     'Environment :: Console',
35 |     'Topic :: System :: Shells',
36 |     'Topic :: Utilities',
37 |     'Topic :: Text Processing :: Markup :: HTML',
38 |     'Topic :: Text Processing :: Markup :: Markdown',
39 |     'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
40 | ]
41 | 
42 | # 使用 flask 的扩展
43 | entry_points = {
44 |     'console_scripts': [
45 |         'w2j=w2j:main'
46 |     ]
47 | }
48 | 
49 | package_data = {
50 |     '': ['requirements.txt']
51 | }
52 | 
53 | 
54 | setup(
55 |     python_requires='>=3.9, <4',
56 |     name = "w2j",
57 |     version=find_version('w2j', '__init__.py'),
58 |     author = "zrong",
59 |     author_email = "zrongzrong@gmail.com",
60 |     url = "https://github.com/zrong/wiz2joplin",
61 |     description = "A tool for migrating from WizNote to Joplin.",
62 |     long_description=read('README.md'),
63 |     long_description_content_type='text/markdown',
64 |     license = "GPLv3",
65 |     keywords = "development zrong wiznote joplin",
66 |     packages = find_packages(exclude=['test*', 'output', 'venv']),
67 |     install_requires=static_requires(),
68 |     entry_points=entry_points,
69 |     include_package_data = True,
70 |     zip_safe=False,
71 |     classifiers = classifiers, 
72 |     package_data=package_data
73 | )
74 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zrong/wiz2joplin/0ce8bf9a867171176c28f199addbef95fe8c6b96/tests/__init__.py


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from os import environ
 3 | 
 4 | import pytest
 5 | 
 6 | from w2j import wiz, joplin
 7 | from w2j.adapter import Adapter
 8 | 
 9 | 
10 | @pytest.fixture(scope='session')
11 | def work_dir():
12 |     work_dir = Path(__file__).parent.parent.joinpath('output/')
13 |     if not work_dir.exists():
14 |         work_dir.mkdir()
15 |     return work_dir
16 | 
17 | 
18 | @pytest.fixture(scope='session')
19 | def ws(wiz_user_id: str, work_dir: Path):
20 |     """ 提供一个为知笔记存储对象
21 |     """
22 |     wiznote_dir = Path('~/.wiznote').expanduser()
23 |     ws = wiz.WizStorage(wiz_user_id, wiznote_dir, is_group_storage=False, work_dir=work_dir)
24 |     return ws
25 | 
26 | 
27 | @pytest.fixture(scope='session')
28 | def wiz_user_id():
29 |     return environ.get('W2J_USER_ID')
30 | 
31 | 
32 | @pytest.fixture(scope='session')
33 | def jda():
34 |     return joplin.JoplinDataAPI(
35 |         token='d3098caff3d80561bf915c15cf3f70956c3550fc13e67bee78f74b1a6b2d2632dff10667668cc6682df27d493aa35492b68fa3f642f738fd80547acf571dc17c'
36 |     )
37 | 
38 | @pytest.fixture(scope='session')
39 | def js():
40 |     joplin_dir = Path('~/.config/joplin-desktop').expanduser()
41 |     return joplin.JoplinStorage(joplin_dir)
42 | 
43 | 
44 | @pytest.fixture(scope='session')
45 | def adapter(ws: wiz.WizStorage, jda: joplin.JoplinDataAPI, work_dir: Path):
46 |     return Adapter(ws, jda, work_dir)
47 | 
48 | 
49 | @pytest.fixture(scope='session')
50 | def wsg(wiz_user_id: str, work_dir: Path):
51 |     """ 提供一个为知笔记 Group 存储对象
52 |     """
53 |     wiznote_dir = Path('~/.wiznote').expanduser()
54 |     wsg = wiz.WizStorage(wiz_user_id, wiznote_dir, is_group_storage=True, work_dir=work_dir)
55 |     return wsg
56 | 
57 | @pytest.fixture(scope='session')
58 | def adapter_group(wsg: wiz.WizStorage, jda: joplin.JoplinDataAPI, work_dir: Path):
59 |     return Adapter(wsg, jda, work_dir)


--------------------------------------------------------------------------------
/tests/test_adapter.py:
--------------------------------------------------------------------------------
 1 | from w2j.wiz import WizStorage
 2 | from w2j.joplin import JoplinStorage
 3 | import pytest
 4 | 
 5 | from w2j.adapter import Adapter
 6 | from w2j.parser import tojoplinid
 7 | 
 8 | 
 9 | @pytest.mark.skip
10 | def test_sync_folders(adapter: Adapter):
11 |     adapter.sync_folders()
12 | 
13 | @pytest.mark.skip
14 | def test_sync_tags(adapter: Adapter):
15 |     adapter.sync_tags()
16 | 
17 | 
18 | @pytest.mark.skip
19 | def test_convert_body(ws: WizStorage, adapter: Adapter):
20 |     """ 测试转换一个文档到 Jopoin
21 |     """
22 |     adapter.sync_folders()
23 |     adapter.sync_tags()
24 |     # 2021-01-4weeks.md
25 |     # guid = '7fdcba42-4d6e-4c1d-a7d8-ffb97a5cca2f'
26 |     # 怎样理性愉快地度过这一生？世界首富巴菲特的合伙人芒格告诉你！
27 |     # guid = 'b500bc97-e6d8-4038-be8d-cd1f182e880c'
28 | 
29 |     guid = '7594790e-b9ab-463a-bc38-7cc546d76513'
30 |     wd = ws.build_document('01607fd4-2c63-b6a2-0b87-294529a3f645')
31 |     jn = adapter._sync_note(wd)
32 |     assert tojoplinid(guid) == jn.id
33 | 
34 | 
35 | def test_convert_location(adapter: Adapter):
36 |     """ 测试转换一个为知笔记目录 到 Joplin，支持同时转换子目录
37 |     """
38 |     location = '/微信收藏/'
39 |     adapter.sync_note_by_location(location, True)
40 | 
41 | 
42 | @pytest.mark.skip
43 | def test_convert_all_group(adapter_group: Adapter):
44 |     """ 测试为知笔记的 group 对象
45 |     """
46 |     location = '/collection/技术/'
47 |     adapter_group.sync_all()
48 | 
49 | 
50 | @pytest.mark.skip
51 | def test_update_joplin_time(adapter_group: Adapter, js: JoplinStorage):
52 |     times = [
53 |         {'id': tojoplinid(wd.guid), 'created_time': wd.created, 'updated_time': wd.modified or wd.created}
54 |         for wd in adapter_group.ws.documents]
55 |     js.update_time(times)


--------------------------------------------------------------------------------
/tests/test_joplin.py:
--------------------------------------------------------------------------------
 1 | from os import name
 2 | from pathlib import Path
 3 | from w2j.joplin import JoplinDataAPI
 4 | import pytest
 5 | 
 6 | def test_ping(jda: JoplinDataAPI):
 7 |     assert jda.ping()
 8 | 
 9 | def test_get_folders(jda: JoplinDataAPI):
10 |     folders, has_more, next_page = jda.get_folders(limit=1)
11 |     assert has_more == True
12 |     # 获取所有的 Folder
13 |     folders, has_more, next_page = jda.get_folders(limit=1, page=0)
14 |     assert has_more == False
15 | 
16 | @pytest.mark.skip
17 | def test_get_folder(jda: JoplinDataAPI):
18 |     # 临时记录 Folder
19 |     # test_id = 'f02beb3e93f4456ea4032613b9a9575d'
20 |     test_id = '467852588872421b939824efdbc26266'
21 |     folder = jda.get_folder(test_id)
22 |     assert folder.id == test_id
23 | 
24 |     
25 | @pytest.mark.skip
26 | def test_post_folders(jda: JoplinDataAPI):
27 |     folder = jda.post_folders(title='创新新的folder')
28 |     print(folder)
29 | 


--------------------------------------------------------------------------------
/tests/test_wiz.py:
--------------------------------------------------------------------------------
 1 | from w2j.wiz import WizStorage
 2 | from pathlib import Path
 3 | import pytest
 4 | 
 5 | 
 6 | def test_datadir(wiz_user_id: str, ws: WizStorage):
 7 |     wiznote_dir = Path('~/.wiznote').expanduser()
 8 |     assert str(ws.data_dir.data_dir.resolve()) == str(wiznote_dir.joinpath(wiz_user_id).joinpath('data').resolve())
 9 | 
10 | 
11 | @pytest.mark.skip
12 | def test_build_tags(ws: WizStorage):
13 |     """ 测试 tag
14 |     """
15 |     tags, tags_dict = ws._build_tags()
16 |     assert len(tags_dict.keys()) > 0
17 | 
18 | 
19 | @pytest.mark.skip
20 | def test_build_attachments(ws: WizStorage):
21 |     """ 测试附件
22 |     """
23 |     attachments, attachments_in_document = ws._build_attachments()
24 |     # 附件总量一般会大于包含附件文档的数量，因为许多文档包含不止一个附件
25 |     assert len(attachments) > len(attachments_in_document)
26 |     
27 | 
28 | @pytest.mark.skip
29 | def test_build_documents(ws: WizStorage):
30 |     """ 测试文档
31 |     """
32 |     documents = ws.build_documents()
33 |     document_rows = ws.data_dir._get_all_document()
34 |     assert len(documents) == len(document_rows)
35 | 
36 | 
37 | @pytest.mark.skip
38 | def test_build_document(ws: WizStorage):
39 |     """ 测试获取一个文档
40 |     """
41 |     # 没有 attachment，有一个 tag
42 |     one_tag = '49c21d80-dc3f-47d6-b37b-7b5602133600'
43 | 
44 |     # Flash向量-8-球和角，有一个 attachment
45 |     two_open_attachment = '44fba993-8f62-4eef-a7db-5f8b332d95d3'
46 | 
47 |     # 2021-02-1weeks.md
48 |     four_open_document = '32321691-f842-4cf2-8a1a-e9f3f1212a42'
49 | 
50 |     # linux 技巧：使用 screen 管理你的远程会话
51 |     more_images = 'f38c347c-17cb-4342-a063-861876f70660'
52 | 
53 |     # 超过 10 个图像
54 |     more_images2 = 'cc18a030-7445-43ad-939a-ad9e264da8d7'
55 |     document = ws.build_document(more_images)
56 | 
57 |     # document = ws.build_document(four_open_document)
58 |     assert document.title == 'Flash向量-8-球和角'


--------------------------------------------------------------------------------
/w2j/__init__.py:
--------------------------------------------------------------------------------
 1 | ##############################
 2 | # w2j =  Wiznote to Joplin
 3 | #
 4 | # https://github.com/zrong/wiz2joplin
 5 | ##############################
 6 | 
 7 | import logging
 8 | import sys
 9 | from pathlib import Path
10 | import argparse
11 | 
12 | __autho__ = 'zrong'
13 | __version__ = '0.4'
14 | 
15 | work_dir = Path.cwd()
16 | logger = logging.Logger('w2j')
17 | log_file = work_dir.joinpath('w2j.log')
18 | log_handler = logging.FileHandler(log_file)
19 | log_handler.setFormatter(logging.Formatter('{asctime} - {funcName} - {message}', style='{'))
20 | # logger.addHandler(logging.StreamHandler(sys.stderr))
21 | logger.addHandler(log_handler)
22 | 
23 | 
24 | parser = argparse.ArgumentParser('w2j', description='Migrate from WizNote to Joplin.')
25 | parser.add_argument('--output', '-o', type=str, metavar='OUTPUT', required=True, help='The output dir for unziped WizNote file and log file. e.g. ~/wiz2joplin_output or C:\\Users\\zrong\\wiz2joplin_output')
26 | parser.add_argument('--wiz-dir', '-w', type=str, metavar='WIZNOTE_DIR', required=True, help='Set the data dir of WizNote. e.g ~/.wiznote or C:\\Program Files\\WizNote')
27 | parser.add_argument('--wiz-user', '-u', type=str, metavar='WIZNOTE_USER_ID', required=True, help='Set your user id(login email) of WizNote.')
28 | parser.add_argument('--joplin-token', '-t', type=str, metavar='JOPLIN_TOKEN', required=True, help='Set the authorization token to access Joplin Web Clipper Service.')
29 | parser.add_argument('--joplin-host', '-n', type=str, metavar='JOPLIN_HOST', default='127.0.0.1', help='Set the host of your Joplin Web Clipper Service, default is 127.0.0.1')
30 | parser.add_argument('--joplin-port', '-p', type=int, metavar='JOPLIN_PORT', default=41184, help='Set the port of your Joplin Web Clipper Service, default is 41184')
31 | parser.add_argument('--location', '-l', type=str, metavar='LOCATION', help='Convert the location of WizNote, e.g. /My Notes/. If you use the --all parameter, then skip --location parameter.')
32 | parser.add_argument('--location-children', '-r', action='store_true', help='Use with --location parameter, convert all children location of --location.')
33 | parser.add_argument('--all', '-a', action='store_true', help='Convert all documents of your WizNote.')
34 | args = parser.parse_args()
35 | 
36 | 
37 | from . import wiz
38 | from . import joplin
39 | from . import adapter
40 | 
41 | __all__ = ['wiz', 'joplin', 'adapter']
42 | 
43 | 
44 | def main() -> None:
45 |     if args.location is None and args.all == False:
46 |         print('Please set --location to assign the location of WizNote, or use --all to convert all of the documents!')
47 |         return
48 |     wiznote_dir = Path(args.wiz_dir).expanduser()
49 |     if not wiznote_dir.exists():
50 |         print(f'The wiznote directory {wiznote_dir} is not exists!')
51 |         return
52 |     output_dir = Path(args.output).expanduser()
53 |     if not output_dir.exists():
54 |         output_dir.mkdir()
55 |     logger.removeHandler(log_file)
56 |     newlog_file = output_dir.joinpath('w2j.log')
57 |     print(f'Please read [{newlog_file.resolve()}] to check the conversion states.')
58 |     logger.addHandler(logging.FileHandler(newlog_file))
59 |     jda = joplin.JoplinDataAPI(
60 |         host=args.joplin_host,
61 |         port=args.joplin_port,
62 |         token=args.joplin_token
63 |     )
64 |     ws = wiz.WizStorage(args.wiz_user, wiznote_dir, is_group_storage=False, work_dir=output_dir)
65 |     ad = adapter.Adapter(ws, jda, work_dir=output_dir)
66 |     if args.all:
67 |         ad.sync_all()
68 |     else:
69 |         ad.sync_note_by_location(args.location, args.location_children)
70 | 


--------------------------------------------------------------------------------
/w2j/__main__.py:
--------------------------------------------------------------------------------
 1 | ##############################
 2 | # w2j =  Wiznote to Joplin
 3 | #
 4 | # python w2j
 5 | # or
 6 | # python -m w2j
 7 | ##############################
 8 | 
 9 | import sys
10 | import os
11 | 
12 | if not __package__:
13 |     path = os.path.join(os.path.dirname(__file__), os.pardir)
14 |     sys.path.insert(0, path)
15 | 
16 | import w2j
17 | w2j.main()


--------------------------------------------------------------------------------
/w2j/adapter.py:
--------------------------------------------------------------------------------
  1 | ##############################
  2 | # w2j.adapter
  3 | #
  4 | # 适配器，将解析后的为知笔记对象装备成 joplin 笔记对象
  5 | ##############################
  6 | 
  7 | from pathlib import Path
  8 | from typing import Optional, Union
  9 | import json
 10 | import sqlite3
 11 | 
 12 | from w2j import logger, work_dir as default_work_dir
 13 | from w2j.wiz import WizDocument, WizAttachment, WizImage, WizInternalLink, WizTag, WizStorage
 14 | from w2j.joplin import JoplinNote, JoplinFolder, JoplinResource, JoplinTag, JoplinDataAPI
 15 | from w2j.parser import tojoplinid, towizid, convert_joplin_body, JoplinInternalLink
 16 | 
 17 | 
 18 | class Location2Folder(object):
 19 |     """ 为知笔记的 Location 与 Joplin 的 Folder 之转换关系
 20 |     """
 21 |     # 为知笔记的全路径名称（包含 / 的所有部分）
 22 |     location: str
 23 | 
 24 |     # 当前目录的名称
 25 |     title: str
 26 | 
 27 |     # 为知笔记的父全路径名称
 28 |     parent_location: str
 29 | 
 30 |     # 1/2/3 来表示当前  Folder 处于第几级，顶级为 level1
 31 |     level: int
 32 | 
 33 |     # Joplin Folder guid，只有创建之后才会存在
 34 |     id: str
 35 | 
 36 |     # 父 Joplin Folder guid
 37 |     parent_id: str
 38 | 
 39 |     def __init__(self, location: str, title: str = None, parent_location: str = None, level: int = 0, id: str = None, parent_id: str = None, **kwargs) -> None:
 40 |         self.location = location
 41 | 
 42 |         if title is None:
 43 |             # 去掉头尾的 / 后使用 / 分隔
 44 |             titles = location[1:-1].split('/')
 45 | 
 46 |             self.level = len(titles)
 47 |             # 最后一个是当前目录
 48 |             self.title = titles[-1]
 49 |             # 有父目录
 50 |             if self.level > 1:
 51 |                 self.parent_location = '/' + '/'.join(titles[:-1]) + '/'
 52 |             else:
 53 |                 self.parent_location = None
 54 |         else:
 55 |             self.title = title
 56 |             self.parent_location = parent_location
 57 |             self.level = level
 58 | 
 59 |         self.id = id
 60 |         self.parent_id = parent_id
 61 | 
 62 |     def __conform__(self, protocol) -> str:
 63 |         if protocol is sqlite3.PrepareProtocol:
 64 |             return f'{self.location};{self.title};{self.parent_location};{self.level};{self.id};{self.parent_id}'
 65 |         return ''
 66 | 
 67 |     def __repr__(self) -> str:
 68 |         return f'<Location2Folder id: {self.id}, title: {self.title}, location: {self.location}, level: {self.level}, parent_location: {self.parent_location}>'
 69 | 
 70 | 
 71 | class ConvertUtil():
 72 |     """ 处理转换的中间过程
 73 |     """
 74 |     # 转换过程中的专用数据库连接
 75 |     conn: sqlite3.Connection
 76 | 
 77 |     # lzf_db 的内容写入 json 文件中，避免每次都要重新生成 Folder，造成重复
 78 |     db_file: Path
 79 | 
 80 |     CREATE_SQL: dict[str, str] = {
 81 |         # 保存 Location 和 Folder 的关系
 82 |         'l2f': """CREATE TABLE l2f (
 83 |                 location TEXT NOT NULL,
 84 |                 id TEXT,
 85 |                 title TEXT NOT NULL,
 86 |                 parent_location TEXT,
 87 |                 parent_id TEXT,
 88 |                 level INTEGER NOT NULL,
 89 |                 PRIMARY KEY (location)
 90 |             );""",
 91 |         # 处理过的文档会保存在这里，在这个表中能找到的文档说明已经转换成功了
 92 |         'note': """CREATE TABLE note (
 93 |                 note_id TEXT not NULL,
 94 |                 title TEXT not NULL,
 95 |                 joplin_folder TEXT NOT NULL,
 96 |                 markup_language INTEGER NOT NULL,
 97 |                 wiz_location TEXT NOT NULL,
 98 |                 PRIMARY KEY (note_id)
 99 |             );""",
100 |         # 处理过的资源保存在这里，包括 image 和 attachment 资源
101 |         'resource': """CREATE TABLE resource (
102 |                 resource_id TEXT not NULL,
103 |                 title TEXT NOT NULL,
104 |                 filename TEXT NOT NULL,
105 |                 created_time INTEGER not NULL,
106 |                 resource_type INTEGER NOT NULL,
107 |                 PRIMARY KEY (resource_id)
108 |             );""",
109 |         # 保存为知笔记中的内链，也就是 resource 与 note 的关系，使用 文档 guid 和 连接目标 guid 同时作为主键。链接目标 guid 为 joplin 格式
110 |         'internal_link': """
111 |             CREATE TABLE internal_link (
112 |                 note_id TEXT not NULL,
113 |                 resource_id TEXT not NULL,
114 |                 title TEXT not NULL,
115 |                 link_type TEXT NOT NULL,
116 |                 PRIMARY KEY (note_id, resource_id)
117 |             );
118 |             CREATE INDEX idx_link_type ON internal_link (link_type);
119 |             CREATE INDEX idx_resource_id ON internal_link (resource_id);
120 |             """,
121 |         # 保存为知笔记中的 tag
122 |         'tag': """
123 |             CREATE TABLE tag (
124 |                 tag_id TEXT not NULL,
125 |                 title TEXT not NULL,
126 |                 created_time INTEGER not NULL,
127 |                 updated_time INTEGER not NULL,
128 |                 PRIMARY KEY (tag_id)
129 |             );
130 |             CREATE UNIQUE INDEX idx_title ON tag (title);
131 |         """,
132 |         # 保存tag 与note 的关系
133 |         'note_tag': """CREATE TABLE note_tag (
134 |             note_id TEXT not NULL,
135 |             tag_id TEXT not NULL,
136 |             title TEXT not NULL,
137 |             created_time INTEGER not NULL,
138 |             PRIMARY KEY (note_id, tag_id)
139 |         );""",
140 |     }
141 | 
142 |     # 目录最大的级别
143 |     folder_max_level: int = 0
144 | 
145 |     # 将为知笔记转换到 Joplin 目录的结果存储到 dict 中
146 |     l2f_cache: dict[str, Location2Folder]
147 | 
148 |     folders: dict[str, JoplinFolder]
149 |     tag: dict[str, JoplinTag]
150 |     notes: dict[str, JoplinNote]
151 |     resources: dict[str, JoplinResource]
152 |     internal_links: dict[str, JoplinInternalLink]
153 | 
154 |     def __init__(self, db_file: Path) -> None:
155 |         self.db_file = db_file
156 |         self.init_db()
157 | 
158 |     def init_db(self):
159 |         """ 创建数据库
160 |         """
161 |         self.conn = sqlite3.connect(self.db_file)
162 |         test_table = "SELECT count(*) FROM sqlite_master WHERE type='table' AND name=?;"
163 | 
164 |         for table in ('l2f', 'note', 'resource', 'internal_link', 'tag', 'note_tag'):
165 |             table_exists = self.conn.execute(test_table, (table, )).fetchone()[0]
166 |             logger.info(f'表 {table} 是否存在: {table_exists}')
167 |             if not table_exists:
168 |                 self.conn.executescript(self.CREATE_SQL[table])
169 | 
170 |     def init_cache(self, documents: list[WizDocument]):
171 |         # 下面的顺序需要严格保持
172 |         # 将 location 转换成 folder
173 |         self.convert_l2f(documents)
174 |         self.load_folders()
175 |         self.load_tags()
176 |         self.load_resources()
177 |         self.load_internal_links()
178 |         self.load_notes()
179 | 
180 |     def close(self):
181 |         self.conn.close()
182 | 
183 |     def build_location_to_top(self, location: str, document: Optional[WizDocument] = None):
184 |         """ 构建一个 location 直到最顶端，并返回这个 location 对应的 l2f 对象
185 |         """
186 |         l2f_inst = self.l2f_cache.get(location)
187 |         if l2f_inst is None:
188 |             l2f_inst = Location2Folder(location)
189 |             self.l2f_cache[location] = l2f_inst
190 |             self.conn.execute(
191 |                 'INSERT INTO l2f(location, title, parent_location, level, id, parent_id) VALUES (:location, :title, :parent_location, :level, :id, :parent_id)',
192 |                 vars(l2f_inst)
193 |             )
194 |             self.conn.commit()
195 |         if l2f_inst is not None and l2f_inst.parent_location is not None:
196 |             # 递归调用时，不传递 document
197 |             self.build_location_to_top(l2f_inst.parent_location, None)
198 |         # 仅当创建「最低端 folder」的时候才会更新 document 中的引用
199 |         if document is not None:
200 |             document.folder = l2f_inst
201 |         # 获取最大的 level
202 |         if l2f_inst.level > self.folder_max_level:
203 |             self.folder_max_level = l2f_inst.level
204 | 
205 |     def convert_l2f(self, documents: list[WizDocument]) -> None:
206 |         """ 将为知笔记中的所有 location 转换成中间格式，等待生成 Joplin Folder
207 |         """
208 |         sql = 'SELECT location, title, parent_location, level, id, parent_id FROM l2f;'
209 |         l2f_items = self.conn.execute(sql).fetchall()
210 |         logger.info(f'在数据库 l2f 中找到 {len(l2f_items)} 条记录。')
211 | 
212 |         # 用 location 作为唯一 key
213 |         self.l2f_cache = {}
214 |         for l2f_item in l2f_items:
215 |             self.l2f_cache[l2f_item[0]] = Location2Folder(*l2f_item)
216 | 
217 |         for document in documents:
218 |             self.build_location_to_top(document.location, document)
219 | 
220 |     def get_folder(self, id: str=None, location: str=None) -> JoplinFolder:
221 |         """ 根据 id 或者 location 获取一个 Joplin Folder
222 |         """
223 |         if id:
224 |             return self.folders.get(id)
225 |         elif location:
226 |             l2f = self.l2f_cache.get(location)
227 |             if l2f is not None:
228 |                 return self.folders.get(l2f.id)
229 |         return None
230 | 
231 |     def get_tags(self, guid: str) -> dict[str, JoplinTag]:
232 |         """ 根据 guid 获取该 note 的所有 tag
233 |         """
234 |         sql = 'SELECT tag_id, title FROM note_tag WHERE note_id=?;'
235 |         items = self.conn.execute(sql, (guid, )).fetchall()
236 |         logger.info(f'在数据库 note_tag 中找到 note {guid} 的 {len(items)} 条 tag 记录。')
237 |         tag_dict: dict[str, JoplinTag] = {}
238 |         for item in items:
239 |             tag_id = item[1]
240 |             tag_dict[tag_id] = self.tags[tag_id]
241 |         return tag_dict
242 | 
243 |     def get_resources(self, links: dict[str, JoplinInternalLink]) -> dict[str, JoplinResource]:
244 |         """ 根据内链获取对应的 resource
245 |         """
246 |         resource_dict: dict[str, JoplinResource] = {}
247 |         for jil in links.values():
248 |             resource = self.resources.get(jil.resource_id)
249 |             if resource:
250 |                 resource_dict[jil.resource_id] = resource
251 |         return resource_dict
252 | 
253 |     def get_internal_links(self, guid: str) -> dict[str, JoplinInternalLink]:
254 |         sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link WHERE note_id=?;'
255 |         items = self.conn.execute(sql, (guid, )).fetchall()
256 |         logger.info(f'在数据库 internal_link 中找到 note {guid} 的 {len(items)} 条内链记录。')
257 |         links = {}
258 |         for item in items:
259 |             # 优先从缓存中获取 jil 对象
260 |             id = f'{item[0]}-{item[1]}'
261 |             jil: JoplinInternalLink = self.internal_links.get(id, JoplinInternalLink(*item))
262 |             links[id] = jil
263 |         return links
264 | 
265 |     def get_note(self, note_id: str) -> JoplinNote:
266 |         return self.notes.get(note_id)
267 | 
268 |     def load_folders(self) -> None:
269 |         """ 将数据库中的 JoplinFolder 载入
270 |         数据库中保存的是  Location2Folder 对象，将其转换成 JoplinFolder
271 |         """
272 |         self.folders = {}
273 |         for l2f in self.l2f_cache.values():
274 |             self.folders[l2f.id] = JoplinFolder(l2f.id, l2f.title, 0, 0, l2f.parent_id)
275 | 
276 |     def load_tags(self) -> None:
277 |         """ 从数据库中载入已经创建的 tag 信息
278 |         """
279 |         sql = 'SELECT tag_id, title, created_time, updated_time FROM tag;'
280 |         tag_items = self.conn.execute(sql).fetchall()
281 |         logger.info(f'在数据库 tag 中找到 {len(tag_items)} 条记录。')
282 |         self.tags = {}
283 |         for tag_item in tag_items:
284 |             self.tags[tag_item[0]] = JoplinTag(*tag_item)
285 | 
286 |     def load_resources(self) -> None:
287 |         sql = 'SELECT resource_id, title, filename, created_time, resource_type FROM resource;'
288 |         items = self.conn.execute(sql).fetchall()
289 |         logger.info(f'在数据库 resource 中找到 {len(items)} 条记录。')
290 |         self.resources = {}
291 |         for item in items:
292 |             jr = JoplinResource(*item)
293 |             self.resources[jr.id] = jr
294 | 
295 |     def load_notes(self) -> None:
296 |         """ 从数据库中载入已经同步的 note
297 |         """
298 |         sql = 'SELECT note_id, title, joplin_folder, markup_language, wiz_location FROM note;'
299 |         items = self.conn.execute(sql).fetchall()
300 |         logger.info(f'在数据库 note 中找到 {len(items)} 条记录。')
301 |         self.notes = {}
302 |         for item in items:
303 |             jn = JoplinNote(item[0], item[1], item[2], item[3], location=item[4])
304 |             jn.folder = self.folders[jn.parent_id]
305 |             jn.internal_links = self.get_internal_links(jn.id)
306 |             jn.resources = self.get_resources(jn.internal_links)
307 |             jn.tags = self.get_tags(jn.id)
308 |             self.notes[jn.id] = jn
309 | 
310 |     def load_internal_links(self) -> None:
311 |         sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link;'
312 |         items = self.conn.execute(sql).fetchall()
313 |         logger.info(f'在数据库 internal_link 中找到 {len(items)} 条内链记录。')
314 |         self.internal_links = {}
315 |         for item in items:
316 |             jil: JoplinInternalLink = JoplinInternalLink(*item)
317 |             self.internal_links[jil.id] = jil
318 | 
319 |     def add_tag(self, tag: JoplinTag) -> None:
320 |         """ 向数据库中加入一个没有创建过的 tag
321 |         """
322 |         if self.tags.get(tag.id) is not None:
323 |             logger.warning(f'tag {tag.id} |{tag.title}| 已经存在，不需要新增。')
324 |             return
325 |         sql = 'INSERT INTO tag (tag_id, title, created_time, updated_time) VALUES (?, ?, ?, ?);'
326 |         self.conn.execute(sql, (tag.id, tag.title, tag.created_time, tag.updated_time))
327 |         self.tags[tag.id] = tag
328 |         self.conn.commit()
329 | 
330 |     def add_resource(self, jr: JoplinResource) -> None:
331 |         """ 向数据库中加入一个没有创建过的 resource
332 |         """
333 |         if self.resources.get(jr.id) is not None:
334 |             logger.warning(f'resource {jr.id} |{jr.title}| 已经存在，不需要新增。')
335 |             return
336 |         sql = 'INSERT INTO resource (resource_id, title, filename, created_time, resource_type) VALUES (?, ?, ?, ?, ?);'
337 |         self.conn.execute(sql, (jr.id, jr.title, jr.filename, jr.created_time, jr.resource_type))
338 |         self.resources[jr.id] = jr
339 |         self.conn.commit()
340 | 
341 |     def add_internal_lnk(self, jil: JoplinInternalLink) -> None:
342 |         if self.internal_links.get(jil.id) is not None:
343 |             logger.warning(f'internal_link {jil.id} |{jil.title}-{jil.link_type}| 已经在数据库中存在，不需要新增。')
344 |             return
345 |         sql = 'INSERT INTO internal_link (note_id, resource_id, title, link_type) VALUES (?, ?, ?, ?);'
346 |         self.conn.execute(sql, (jil.note_id, jil.resource_id, jil.title, jil.link_type))
347 |         self.internal_links[jil.id] = jil
348 |         self.conn.commit()
349 | 
350 |     def add_note_tag(self, note: JoplinNote, tag: JoplinTag) -> None:
351 |         """ 增加一个 note 的 tag
352 |         """
353 |         test_note_tag = "SELECT count(*) FROM note_tag WHERE note_id=? AND tag_id=?;"
354 |         note_tag_item = self.conn.execute(test_note_tag, (note.id, tag.id)).fetchone()
355 |         if note_tag_item:
356 |             logger.warning(f'note {note.id}|{note.title}| 的 tag {tag.id}|{tag.title}| 已经存在！')
357 |             return
358 |         sql = 'INSERT INTO note_tag (note_id, tag_id, title, created_time) VALUES (?, ?, ?, ?);'
359 |         self.conn.execute(sql, (note.id, tag.id, tag.title, tag.created_time))
360 |         self.conn.commit()
361 | 
362 |     def add_note(self, note: JoplinNote) -> None:
363 |         if self.notes.get(note.id) is not None:
364 |             logger.warning(f'note {note.id} |{note.title}| 已经在数据库中存在，不需要新增。')
365 |             return
366 |         sql = 'INSERT INTO note (note_id, title, joplin_folder, markup_language, wiz_location) VALUES (?, ?, ?, ?, ?);'
367 |         self.conn.execute(sql, (note.id, note.title, note.parent_id, note.markup_language, note.location))
368 |         self.conn.commit()
369 | 
370 |         self.notes[note.id] = note
371 |         for tag in note.tags.values():
372 |             self.add_note_tag(note, tag)
373 |         for jil in note.internal_links.values():
374 |             self.add_internal_lnk(jil)
375 | 
376 |     def update_l2f(self, location: str, id: str, parent_id: Optional[str] = None):
377 |         """ 更新 Folder 的 guid 到 l2f 对象中
378 |         每次更新都写入 db
379 |         """
380 |         l2f_inst = self.l2f_cache[location]
381 |         l2f_inst.id = id
382 |         if parent_id is not None:
383 |             l2f_inst.parent_id = parent_id
384 |         self.conn.execute(
385 |             'UPDATE l2f SET parent_id=:parent_id, id=:id WHERE location=:location',
386 |             vars(l2f_inst)
387 |         )
388 |         self.conn.commit()
389 | 
390 |     def get_waiting_for_created_l2f(self) -> list[Location2Folder]:
391 |         """ 按照 level 排序并返回 l2f 对象，level 低的必须先创建
392 |         """
393 |         waiting_for_created = [v for v in self.l2f_cache.values() if v.id is None]
394 |         waiting_for_created.sort(key=lambda l2f: l2f.level)
395 |         return waiting_for_created
396 | 
397 | 
398 | class Adapter(object):
399 |     """ 负责把为知笔记的对象转换成对应想 Joplin 笔记对象
400 |     """
401 | 
402 |     ws: WizStorage
403 |     jda: JoplinDataAPI
404 |     work_dir: Path
405 |     cu: ConvertUtil
406 | 
407 |     def __init__(self, ws: WizStorage, jda: JoplinDataAPI, work_dir: Path=None) -> None:
408 |         self.ws = ws
409 |         self.jda = jda
410 |         self.work_dir = work_dir or default_work_dir
411 | 
412 |         # 解析所有的文档
413 |         self.ws.resolve()
414 | 
415 |         # 从数据库载入缓存
416 |         self.cu = ConvertUtil(self.work_dir.joinpath('w2j.sqlite'))
417 |         self.cu.init_cache(self.ws.documents)
418 | 
419 |     def sync_folders(self) -> None:
420 |         """ 同步为知笔记的目录 到 Joplin Folder
421 |         在为知笔记中，目录不是一种资源，它直接在配置文件中定义，在数据库中仅作为 location 字段存在
422 |         而在 Joplin 中，目录是一种标准资源 https://joplinapp.org/api/references/rest_api/#item-type-ids
423 |         """
424 |         waiting_created_l2f = self.cu.get_waiting_for_created_l2f()
425 |         logger.info(f'有 {len(waiting_created_l2f)} 个 folder 等待同步。')
426 |         for l2f in waiting_created_l2f:
427 |             jf = None
428 |             logger.info(f'处理 location {l2f.location}')
429 |             # level1 没有父对象
430 |             if l2f.parent_location is None:
431 |                 jf = self.jda.post_folder(title=l2f.title)
432 |                 self.cu.update_l2f(l2f.location, jf.id)
433 |             else:
434 |                 parent_l2f: Location2Folder = self.cu.l2f_cache.get(l2f.parent_location)
435 |                 if parent_l2f is None:
436 |                     msg = f'找不到父对象 {l2f.parent_location}！'
437 |                     logger.error(msg)
438 |                     raise ValueError(msg)
439 |                 if parent_l2f.id is None:
440 |                     msg = f'父对象 {l2f.parent_location} 没有 id！'
441 |                     logger.error(msg)
442 |                     raise ValueError(msg)
443 |                 jf = self.jda.post_folder(title=l2f.title, parent_id=parent_l2f.id)
444 |                 self.cu.update_l2f(l2f.location, jf.id, jf.parent_id)
445 |         # 更新了 l2f_cache 之后，要更新一次 folders
446 |         self.cu.load_folders()
447 | 
448 |     def sync_tags(self) -> None:
449 |         """ 同步为知笔记的 tag 到 Joplin Tag
450 |         """
451 |         created_keys = self.cu.tags.keys()
452 |         waiting_create_tags = [wt for wt in self.ws.tags if not tojoplinid(wt.guid) in created_keys]
453 |         logger.info(f'为知笔记共有 {len(self.ws.tags)} 个 tag 。')
454 |         logger.info(f'有 {len(waiting_create_tags)} 个 tag 等待同步。')
455 |         for wt in waiting_create_tags:
456 |             tag_id = tojoplinid(wt.guid)
457 |             try:
458 |                 logger.info(f'处理 tag {wt.name} {tag_id}')
459 |                 jt = self.jda.post_tag(id=tag_id, title=wt.name, created_time=wt.modified, updated_time=wt.modified)
460 |                 self.cu.add_tag(jt)
461 |             except ValueError as e:
462 |                 logger.error(e)
463 |                 # 由于加入的 tag 没有写入转换数据库导致的 guid 重复错误，此时需要将 tag 写入转换数据库
464 |                 if str(e).find('SQLITE_CONSTRAINT: UNIQUE constraint failed') > -1:
465 |                     jt = self.jda.get_tag(tag_id)
466 |                     self.cu.add_tag(jt)
467 |                 continue
468 | 
469 |     def _upload_wiz_attachment(self, attach: WizAttachment) -> JoplinResource:
470 |         """ 上传一个未知附件
471 |         """
472 |         resource_id = tojoplinid(attach.guid)
473 |         jr: JoplinResource = self.cu.resources.get(resource_id)
474 |         if jr is not None:
475 |             logger.warning(f'resource {resource_id} |{jr.title}|已经存在！')
476 |             return
477 |         jr = self.jda.post_resource(
478 |             attach.file,
479 |             1,
480 |             id=tojoplinid(attach.guid),
481 |             title=attach.name,
482 |             filename=attach.name,
483 |             created_time=attach.modified,
484 |             updated_time=attach.modified
485 |         )
486 |         self.cu.add_resource(jr)
487 |         return jr
488 | 
489 |     def _upload_wiz_image(self, image: WizImage) -> JoplinResource:
490 |         """ 上传一个为知图像
491 |         """
492 |         jr: JoplinResource = self.jda.post_resource(
493 |             image.file,
494 |             2,
495 |             title=image.src,
496 |             filename=image.src
497 |         )
498 |         self.cu.add_resource(jr)
499 |         return jr
500 | 
501 |     def _sync_note(self, document: WizDocument) -> JoplinNote:
502 |         """ 同步一篇笔记
503 |         """
504 |         logger.info(f'正在处理 document {document.guid}|{document.title}|。')
505 |         note_id = tojoplinid(document.guid)
506 |         jn: JoplinNote = self.cu.get_note(note_id)
507 |         if jn is not None:
508 |             logger.warning(f'note {jn.id} |{jn.title}| 已经存在！')
509 |             return
510 | 
511 |         # 临时保存上传成功后生成的 Image 和 Attachment 对应的 Joplin Resource
512 |         resources_in_note: dict[str, JoplinResource] = {}
513 | 
514 |         # 为知笔记中的图像不在内链中，附件也可能不在内链中，将它们全部加入内链。
515 |         # 附件即使已经包含在内链中了，也需要在 body 末尾再加上一个内链
516 |         joplin_internal_links: dict[str, JoplinInternalLink] = {}
517 | 
518 |         # 处理为知笔记文档中已经包含的内链
519 |         for wil in document.internal_links:
520 |             resource_id = tojoplinid(wil.guid)
521 |             jil: JoplinInternalLink = JoplinInternalLink(note_id, resource_id,  wil.title, wil.link_type, wil.outerhtml)
522 |             joplin_internal_links[jil.id] = jil
523 | 
524 |         # 上传附件
525 |         for attachment in document.attachments:
526 |             jr: JoplinResource = self._upload_wiz_attachment(attachment)
527 |             resources_in_note[jr.id] = jr
528 | 
529 |             jil_id = f'{note_id}-{jr.id}'
530 |             jil: JoplinInternalLink = joplin_internal_links.get(jil_id)
531 |             if jil is not None:
532 |                 logger.warning(f'内链关系 {jil_id} 已存在！')
533 |                 continue
534 | 
535 |             # 这个附件在附件列表中存在，但是在 body 中不存在，此时没有 outerhtml，需要在转换时将这个附件添加到 body 末尾
536 |             jil: JoplinInternalLink = JoplinInternalLink(note_id, jr.id, jr.title, 'open_attachment')
537 |             joplin_internal_links[jil.id] = jil
538 | 
539 |         # 上传图像，将每个文档中的图像生成为 Jopin 中的资源
540 |         for image in document.images:
541 |             jr: JoplinResource = self._upload_wiz_image(image)
542 |             resources_in_note[jr.id] = jr
543 |             jil: JoplinInternalLink = JoplinInternalLink(note_id, jr.id, jr.title, 'image', image.outerhtml)
544 |             joplin_internal_links[jil.id] = jil
545 | 
546 |         # 创建一个 joplin note 并将 wiz document 的对应值存入
547 |         body = convert_joplin_body(
548 |             document.body,
549 |             document.is_markdown,
550 |             joplin_internal_links.values()
551 |            )
552 | 
553 |         folder = self.cu.get_folder(location=document.location)
554 |         note: JoplinNote = self.jda.post_note(note_id, document.title, body, document.is_markdown, folder.id, document.url)
555 |         note.internal_links = joplin_internal_links
556 |         note.folder = folder
557 |         note.tags = self.cu.get_tags(note.id)
558 |         self.cu.add_note(note)
559 | 
560 |         return note
561 |         
562 |     def _get_locations(self, location: str, locations: list[str]) -> None:
563 |         """ 获取一个 location 下的所有 location
564 |         """
565 |         cur_l2f = self.cu.l2f_cache.get(location)
566 |         if cur_l2f is None:
567 |             raise ValueError(f'找不到 {location}')
568 |         for l2f in self.cu.l2f_cache.values():
569 |             if l2f.parent_location and l2f.level > cur_l2f.level and l2f.parent_location == location:
570 |                 # print(f'{cur_l2f.level} {l2f.level} {self.cu.folder_max_level} {l2f.parent_location} {l2f.location} {location}')
571 |                 locations.append(l2f.location)
572 |                 self._get_locations(l2f.location, locations)
573 | 
574 |     def sync_note_by_location(self, location: str, with_children: bool=True) -> None:
575 |         """ 同步指定为知笔记目录中所有的笔记
576 |         """
577 |         self.sync_folders()
578 |         self.sync_tags()
579 |         locations = [location]
580 |         if with_children:
581 |             self._get_locations(location, locations)
582 |         logger.info(f'处理以下 location： {locations}')
583 |         waiting_for_sync = [wd for wd in self.ws.documents if wd.location in locations]
584 |         logger.info(f'为知笔记目录 {location} 中有 {len(waiting_for_sync)} 篇笔记等待同步。')
585 |         for wd in waiting_for_sync:
586 |             self._sync_note(wd)
587 | 
588 |     def sync_all(self) -> None:
589 |         """ 同步所有内容
590 |         """
591 |         self.sync_folders()
592 |         self.sync_tags()
593 |         logger.info(f'为知笔记转换所有文档 {len(self.ws.documents)} 篇。')
594 |         for wd in self.ws.documents:
595 |             self._sync_note(wd)
596 | 


--------------------------------------------------------------------------------
/w2j/joplin.py:
--------------------------------------------------------------------------------
  1 | ##############################
  2 | # w2j.joplin
  3 | #
  4 | # 处理 Joplin 相关
  5 | # 结构查看 https://joplinapp.org/api/references/rest_api/
  6 | ##############################
  7 | 
  8 | 
  9 | import json
 10 | from pathlib import Path
 11 | from typing import Optional, Union
 12 | 
 13 | import httpx
 14 | import sqlite3
 15 | 
 16 | from w2j import logger
 17 | from w2j.parser import JoplinInternalLink
 18 | 
 19 | 
 20 | class JoplinFolder(object):
 21 |     """ Joplin 中的 notebook
 22 |     """
 23 |     name = 'folder'
 24 |     type_ = 2
 25 | 
 26 |     # Folder 在 Joplin 数据库中的 guid
 27 |     id: str = None
 28 | 
 29 |     # Folder 名称
 30 |     title: str = None
 31 | 
 32 |     # 创建时间戳
 33 |     created_time: int = 0
 34 | 
 35 |     # 更新时间戳
 36 |     updated_time: int = 0
 37 | 
 38 |     # 如果有父 Folder，则值为其 ID
 39 |     parent_id: str = None
 40 | 
 41 |     # 所有必须的 fields 名称
 42 |     fields = ['id', 'title', 'created_time', 'updated_time', 'parent_id']
 43 | 
 44 |     def __init__(self, id: str, title: str, created_time: int, updated_time: int, parent_id: Optional[str] = None, **kwargs) -> None:
 45 |         self.id = id
 46 |         self.title = title
 47 |         self.created_time = created_time
 48 |         self.updated_time = updated_time
 49 |         self.parent_id = parent_id
 50 | 
 51 |     @classmethod
 52 |     def fields_str(cls) -> str:
 53 |         return ','.join(cls.fields)
 54 | 
 55 |     def __repr__(self) -> str:
 56 |         return f'<JoplinFolder {self.id}, {self.title}, parent_id: {self.parent_id}>'
 57 | 
 58 | 
 59 | class JoplinResource(object):
 60 |     """ Joplin 中的 Resource
 61 |     """
 62 |     name = 'resource'
 63 |     type_ = 4
 64 | 
 65 |     id: str = None
 66 |     title: str = None
 67 |     filename: str = None
 68 |     file_extension: str = None
 69 |     created_time: int = None
 70 |     updated_time: int = None
 71 | 
 72 |     # 1 附件，2 文中图像
 73 |     resource_type: int
 74 | 
 75 |     # 所有必须的 fields 名称
 76 |     fields = ['id', 'title', 'created_time', 'updated_time', 'filename', 'file_extension']
 77 | 
 78 |     @classmethod
 79 |     def fields_str(cls) -> str:
 80 |         return ','.join(cls.fields)
 81 | 
 82 |     def __init__(self, id: str, title: str, filename: str, created_time: int, resource_type: int, **kwargs) -> None:
 83 |         self.id = id
 84 |         self.title = title
 85 |         self.filename = filename
 86 |         self.resource_type = resource_type
 87 |         self.created_time = created_time
 88 |         if kwargs.get('file_extension'):
 89 |             self.file_extension = kwargs.get('file_extension')
 90 |         if kwargs.get('updated_time'):
 91 |             self.updated_time = kwargs.get('updated_time')
 92 | 
 93 |     def __repr__(self) -> str:
 94 |         return f'<JoplinResource {self.id} |{self.title}|{self.resource_type} {self.filename}>'
 95 | 
 96 | 
 97 | class JoplinTag(object):
 98 |     """ Joplin 中的 tag
 99 |     """
100 |     name = 'tag'
101 |     type_ = 5
102 | 
103 |     id: str = None
104 |     title: str = None
105 |     parent_id: str = None
106 |     created_time: int = 0
107 |     updated_time: int = 0
108 | 
109 |     # 所有必须的 fields 名称
110 |     fields = ['id', 'title', 'created_time', 'updated_time']
111 | 
112 |     @classmethod
113 |     def fields_str(cls):
114 |         return ','.join(cls.fields)
115 | 
116 |     def __init__(self, id: str, title: str, created_time:int, updated_time: int = 0, **kwargs) -> None:
117 |         self.id = id
118 |         self.title = title
119 |         self.created_time = created_time
120 |         self.updated_time = created_time if updated_time == 0 else updated_time
121 | 
122 |     def __repr__(self) -> str:
123 |         return f'<JoplinTag {self.id}, {self.title}, parent_id: {self.parent_id}>'
124 | 
125 | 
126 | class JoplinNote(object):
127 |     """ 创建一个 Joplin 的 Note 类
128 |     """
129 |     # Joplin 中的 note id，32 位
130 |     id: str = None
131 | 
132 |     # Joplin 中的 note title
133 |     title: str = None
134 | 
135 |     # Joplin 中的 body
136 |     body: str = None
137 | 
138 |     # Joplin 中的创建时间戳，毫秒
139 |     created_time: int = None
140 | 
141 |     # Joplin 中的更新时间戳，毫秒
142 |     updated_time: int = None
143 | 
144 |     # Joplin 中的文章 url
145 |     source_url: str = None
146 | 
147 |     # 1 代表 markdown，2代表 html
148 |     markup_language: int = 1
149 | 
150 |     location: str = ''
151 |     parent_id: str = ''
152 | 
153 |     tags: dict[str, JoplinTag] = {}
154 |     resources: dict[str, JoplinResource] = {}
155 |     internal_links: dict[JoplinInternalLink] = {}
156 |     folder: JoplinFolder = None
157 | 
158 |     # 所有必须的 fields 名称
159 |     fields = ['id', 'title', 'parent_id', 'created_time', 'updated_time', 'body', 'source_url', 'markup_lanaguage']
160 | 
161 |     def __init__(self, id: str, title: str, parent_id: str, markup_language: int, **kwargs) -> None:
162 |         self.id = id
163 |         self.title = title
164 |         self.parent_id = parent_id
165 |         self.markup_language = markup_language
166 |         if kwargs.get('location'):
167 |             self.location = kwargs.get('location')
168 |         if kwargs.get('source_url'):
169 |             self.source_url = kwargs.get('source_url')
170 |         if kwargs.get('created_time'):
171 |             self.created_time = kwargs.get('created_time')
172 |         if kwargs.get('updated_time'):
173 |             self.updated_time = kwargs.get('updated_time')
174 | 
175 |     @classmethod
176 |     def fields_str(cls):
177 |         return ','.join(cls.fields)
178 | 
179 |     def __repr__(self) -> str:
180 |         return f'<JoplinNote {self.id}, {self.title}, tag: {len(self.tags)}, resource: {len(self.resources)}, folder: {len(self.folder)}>'
181 | 
182 | 
183 | class JoplinDataAPI(object):
184 | 
185 |     host: str
186 |     port: int
187 |     token: str
188 |     base_url: str
189 |     
190 |     client: httpx.Client = None
191 | 
192 |     def __init__(self, host: str = '127.0.0.1', port: int = 41184, token: str = 'ad9b597aac8c9fa2083cb23c4354eb589b1252e6a366185d94795077ed076dfdb312c22b8640a05e5af7b784d65d831a429771e3cc2bcbe3f9cdac441d6fcca6') -> None:
193 |         self.host = host
194 |         self.port = port
195 |         self.token = token
196 |         self.base_url = f'http://{self.host}:{self.port}'
197 |         self.client = httpx.Client(base_url=self.base_url, timeout=100)
198 | 
199 |     def _build_query(self, **kwargs):
200 |         return httpx.QueryParams(token=self.token, **kwargs)
201 | 
202 |     def _check_pagination(self, page: int, paginated_resp: httpx.Response) -> tuple[list, bool, int]:
203 |         """ 专门处理分页
204 |         :returns: items, has_more, next_page
205 |         """
206 |         data = paginated_resp.json()
207 |         if data.get('error'):
208 |             raise ValueError(data['error'])
209 |         has_more = data['has_more']
210 |         next_page = page
211 |         if has_more:
212 |             next_page += 1
213 |         return data['items'], has_more, next_page
214 | 
215 |     def close(self):
216 |         self.client.close()
217 | 
218 |     def ping(self) -> bool:
219 |         resp = self.client.get('/ping')
220 |         return resp.text == 'JoplinClipperServer'
221 | 
222 |     def search(self, query: str, type_: str) -> httpx.Response:
223 |         return self.client.get('/search')
224 | 
225 |     def get_folder(self, guid: str) -> JoplinFolder:
226 |         """ 根据 guid 获取 folder
227 |         """
228 |         query = self._build_query()
229 |         resp = self.client.get(f'/folders/{guid}', params=query)
230 |         data = resp.json()
231 |         if data.get('error'):
232 |             raise ValueError(data['error'])
233 |         return JoplinFolder(**data)
234 | 
235 |     # https://joplinapp.org/api/references/rest_api/#pagination
236 |     # https://joplinapp.org/api/references/rest_api/#get-folders
237 |     # order_by=updated_time&order_dir=ASC&limit=10&page=2
238 |     def get_folders(self, order_by: str='updated_time', order_dir: str='ASC', limit: int=100, page: int=1) -> \
239 |         tuple[ list[JoplinFolder], bool, int]:
240 |         """ 获取一组 folder，支持分页
241 |         :returns: joplin folder list, has_more, next_page
242 |         """
243 |         folders: list[JoplinFolder] = []
244 | 
245 |         def __build_query(page: int) -> httpx.QueryParams:
246 |             return self._build_query(order_by=order_by, order_dir=order_dir, page=page, limit=limit, fields=JoplinFolder.fields_str())
247 | 
248 |         def __get_folders(query: httpx.QueryParams) -> tuple[bool, int]:
249 |             resp = self.client.get('/folders', params=query)
250 |             items, has_more, next_page = self._check_pagination(int(query.get('page')), resp)
251 |             for item in items:
252 |                 folders.append(JoplinFolder(**item))
253 |             return has_more, next_page
254 | 
255 |         if page > 0:
256 |             query = __build_query(page)
257 |             has_more, next_page = __get_folders(query)
258 |             return folders, has_more, next_page
259 | 
260 |         # 小于等于 0 的 page 代表获取全部
261 |         page = 1
262 |         query = __build_query(page)
263 |         has_more, next_page = __get_folders(query)
264 |         while(has_more):
265 |             query = __build_query(next_page)
266 |             has_more, next_page = __get_folders(query)
267 |         return folders, has_more, next_page
268 | 
269 |     def get_folder_note(self, guid: str) -> list[JoplinFolder]:
270 |         """ 获取一个 folder 下的所有 note
271 |         """
272 |         query = self._build_query(fields=JoplinNote.fields_str())
273 |         resp = self.client.get('/folders/{guid}/notes', params=query)
274 |         return JoplinNote(**resp.json())
275 | 
276 |     def post_folder(self, **kwargs) -> JoplinFolder:
277 |         """ 创建一个新的 folder
278 |         """
279 |         query = self._build_query()
280 |         logger.info(f'向 Joplin 增加 folder {kwargs}')
281 |         resp = self.client.post('/folders', params=query, json=kwargs)
282 |         data = resp.json()
283 |         if data.get('error'):
284 |             logger.error(data['error'])
285 |             raise ValueError(data['error'])
286 |         return JoplinFolder(**data)
287 | 
288 |     def post_tag(self, **kwargs) -> JoplinTag:
289 |         """ 创建一个新的 tag
290 |         """
291 |         query = self._build_query()
292 |         logger.info(f'向 Joplin 增加 tag {kwargs}')
293 |         resp = self.client.post('/tags', params=query, json=kwargs)
294 |         data = resp.json()
295 |         if data.get('error'):
296 |             logger.error(data['error'])
297 |             raise ValueError(data['error'])
298 |         return JoplinTag(**data)
299 | 
300 |     def get_tag(self, guid: str) -> JoplinTag:
301 |         """ 根据 guid 获取 tag
302 |         """
303 |         query = self._build_query(fields=JoplinTag.fields_str())
304 |         resp = self.client.get(f'/tags/{guid}', params=query)
305 |         data = resp.json()
306 |         logger.info(f'从 Joplin 获取 tag {guid}: {data}')
307 |         if data.get('error'):
308 |             logger.error(data['error'])
309 |             raise ValueError(data['error'])
310 |         return JoplinTag(**data)
311 | 
312 |     def post_resource(self, file: Path, resource_type: int, **kwargs) -> JoplinResource:
313 |         """ 创建一个新的 resources
314 |         """
315 |         query = self._build_query()
316 |         files = {'data': open(file, 'rb')}
317 |         # 经过测试 props 中只有 title 和 id 有作用，其他的参数都无效
318 |         data = {'props': json.dumps(kwargs)}
319 |         logger.info(f'向 Joplin 增加 resource {file} {kwargs}')
320 |         resp = self.client.post('/resources', params=query, files=files, data=data)
321 |         data = resp.json()
322 |         if data.get('error'):
323 |             logger.error(data['error'])
324 |             raise ValueError(data['error'])
325 |         return JoplinResource(**data, resource_type=resource_type)
326 | 
327 |     def get_resource(self, guid: str) -> JoplinResource:
328 |         """ 根据 guid 获取 resource
329 |         """
330 |         query = self._build_query(fields=JoplinResource.fields_str())
331 |         resp = self.client.get(f'/resources/{guid}', params=query)
332 |         data = resp.json()
333 |         logger.info(f'从 Joplin 获取 resource {guid}: {data}')
334 |         if data.get('error'):
335 |             logger.error(data['error'])
336 |             raise ValueError(data['error'])
337 |         return JoplinResource(**data)
338 | 
339 |     def post_note(self, id: str, title: str, body: str, 
340 |         is_markdown: bool, parent_id: str, source_url: str) -> JoplinNote:
341 |         """ 创建一个新的 Note
342 |         隐藏的 Joplin 参数：通过抓包 Joplin WebClipper
343 |         
344 |         complete Page Html
345 |         source_command
346 |         {
347 |             'name': 'completePageHtml',
348 |             'preProcessFor': 'html'
349 |         }
350 |         convert_to = html
351 | 
352 |         simplified Page Html
353 |         source_command
354 |         {
355 |             'name': 'simplifiedPageHtml',
356 |         }
357 |         convert_to = markdown
358 | 
359 |         complete page
360 |         source_command = markdown
361 |         {
362 |             'name': 'completePageHtml',
363 |             'preProcessFor': 'markdown'
364 |         }
365 |         convert_to = markdown
366 |         """
367 |         kwargs = {
368 |             'id': id,
369 |             'title': title,
370 |             'parent_id': parent_id,
371 |             'markup_language': 1,
372 |         }
373 |         if source_url:
374 |             kwargs['source_url'] = source_url
375 |         if is_markdown:
376 |             kwargs['body'] = body
377 |         else:
378 |             # 使用 joplin 的功能将所有的 html 都转换成 markdown
379 |             kwargs['body_html'] = body
380 |             kwargs['convert_to'] = 'markdown'
381 |             kwargs['source_command'] = {
382 |                 'name': 'simplifiedPageHtml',
383 |             }
384 | 
385 |         query = self._build_query()
386 |         logger.info(f'向 Joplin 增加 note {kwargs}')
387 |         resp = self.client.post('/notes', params=query, json=kwargs)
388 |         data = resp.json()
389 |         if data.get('error'):
390 |             logger.error(data['error'])
391 |             raise ValueError(data['error'])
392 |         return JoplinNote(**data)
393 | 
394 |     def get_note(self, guid: str) -> JoplinNote:
395 |         """ 根据 guid 获取 note
396 |         """
397 |         query = self._build_query(fields=JoplinNote.fields_str())
398 |         resp = self.client.get(f'/notes/{guid}', params=query)
399 |         data = resp.json()
400 |         logger.info(f'从 Joplin 获取 note {guid}: {data}')
401 |         if data.get('error'):
402 |             raise ValueError(data['error'])
403 |         return JoplinNote(**data)
404 | 
405 | 
406 | class JoplinStorage(object):
407 |     """ 保存 Joplin 数据
408 |     """
409 |     # joplin 资源所在文件夹
410 |     joplin_dir: Path
411 | 
412 |     # joplin 主数据库
413 |     db_file: Path
414 | 
415 |     def __init__(self, joplin_dir: Path) -> None:
416 |         self.joplin_dir = joplin_dir
417 |         self.db_file = self.joplin_dir.joinpath('database.sqlite')
418 | 
419 |     def update_time(self, wiz_document_times: list[dict[str, Union[str, int]]]):
420 |         """ 根据为知笔记的文章更新时间修改 Joplin note 的时间
421 |         """
422 |         self.conn = sqlite3.connect(self.db_file)
423 |         sql = "UPDATE notes SET created_time=:created_time, updated_time=:updated_time, user_created_time=:created_time, user_updated_time=:updated_time WHERE id=:id;"
424 |         # for wdt in wiz_document_times:
425 |         #     print(wdt)
426 |         cursor = self.conn.executemany(sql, wiz_document_times)
427 |         print(cursor.rowcount)
428 |         self.conn.commit()
429 |         self.conn.close()


--------------------------------------------------------------------------------
/w2j/parser.py:
--------------------------------------------------------------------------------
  1 | ##############################
  2 | # w2j.parser
  3 | # 解析器，解析 html 源码
  4 | ##############################
  5 | 
  6 | from datetime import datetime, timezone, timedelta
  7 | from os import link
  8 | from pathlib import Path
  9 | import re
 10 | import chardet
 11 | from inscriptis import get_text
 12 | 
 13 | 
 14 | RE_A_START = r'<a href="'
 15 | RE_A_END = r'">([^<]+)</a>'
 16 | 
 17 | # 附件内链
 18 | # 早期的链接没有双斜杠
 19 | # wiz:open_attachment?guid=8337764c-f89d-4267-bdf2-2e26ff156098
 20 | # 后期的链接有双斜杠
 21 | # wiz://open_attachment?guid=52935f17-c1bb-45b7-b443-b7ba1b6f854e
 22 | RE_OPEN_ATTACHMENT_HREF = r'wiz:/{0,2}(open_\w+)\?guid=([a-z0-9\-]{36})'
 23 | RE_OPEN_ATTACHMENT_OUTERHTML = RE_A_START + RE_OPEN_ATTACHMENT_HREF + RE_A_END
 24 | 
 25 | # 文档内链，只需要提取 guid 后面的部分即可
 26 | # wiz://open_document?guid=c6204f26-f966-4626-ad41-1b5fbdb6829e&amp;kbguid=&amp;private_kbguid=69899a48-dc52-11e0-892c-00237def97cc
 27 | RE_OPEN_DOCUMENT_HREF = r'wiz:/{0,2}(open_\w+)\?guid=([a-z0-9\-]{36})&amp;kbguid=&amp;private_kbguid=([a-z0-9\-]{36})'
 28 | RE_OPEN_DOCUMENT_OUTERHTML = RE_A_START + RE_OPEN_DOCUMENT_HREF + RE_A_END
 29 | 
 30 | 
 31 | # 图像文件在 body 中存在的形式，即使是在 .md 文件中，也依然使用这种形式存在
 32 | RE_IMAGE_OUTERHTML = r'<img .*?src="(index_files/[^"]+)"[^>]*>'
 33 | 
 34 | 
 35 | class WizInternalLink(object):
 36 |     """ 嵌入 html 正文中的为知笔记内部链接，可能是笔记，也可能是附件
 37 |     """
 38 |     # 原始链接的整个 HTML 内容，包括 <a href="link....">名称</a>
 39 |     outerhtml: str = None
 40 | 
 41 |     # 链接的 title
 42 |     title: str = None
 43 | 
 44 |     # 原始链接中的资源 guid，可能是 attachemnt 或者是 document
 45 |     guid: str = None
 46 | 
 47 |     # 值为 open_attachment 或者 open_document
 48 |     link_type: str = 'open_attachment'
 49 | 
 50 |     def __init__(self, outerhtml: str, guid: str, title: str, link_type: str) -> None:
 51 |         self.outerhtml = outerhtml
 52 |         self.guid = guid
 53 |         self.title = title
 54 |         self.link_type = link_type
 55 | 
 56 |     def __repr__(self) -> str:
 57 |         return f'<WizInternalLink {self.link_type}, {self.title}, {self.guid}>'
 58 | 
 59 | 
 60 | class WizImage(object):
 61 |     """ 在为知笔记文章中包含的本地图像
 62 | 
 63 |     在为知笔记中，本地图像不属于资源，也没有自己的 guid
 64 |     """
 65 |     # 原始图像的整个 HTML 内容，包括 <img src="index_files/name.jpg">
 66 |     outerhtml: str = None
 67 | 
 68 |     # 仅包含图像的 src 部分
 69 |     src: str = None
 70 | 
 71 |     # 图像文件的 Path 对象，在硬盘上的路径
 72 |     file: Path = None
 73 | 
 74 |     def __init__(self, outerhtml: str, src: str, note_extract_dir: Path) -> None:
 75 |         self.outerhtml = outerhtml
 76 |         self.src = src
 77 |         self.file = note_extract_dir.joinpath(src)
 78 | 
 79 |         if not self.file.exists():
 80 |             raise FileNotFoundError(f'找不到文件 {self.file}！')
 81 | 
 82 |     def __repr__(self) -> str:
 83 |         return f'<WizImage {self.src}, {self.outerhtml}>'
 84 | 
 85 | 
 86 | def parse_wiz_html(note_extract_dir: Path, title: str) -> tuple[str, list[WizInternalLink], list[WizImage]]:
 87 |     """ 在为知笔记文档的 index.html 中搜索内链的附件和文档链接
 88 |     """
 89 |     index_html = note_extract_dir.joinpath('index.html')
 90 |     if not index_html.is_file:
 91 |         raise FileNotFoundError(f'主文档文件不存在！ {index_html} |{title}|')
 92 |     html_body_bytes = index_html.read_bytes()
 93 |     # 早期版本的 html 文件使用的是 UTF-16 LE(BOM) 编码保存。最新的文件是使用 UTF-8(BOM) 编码保存。要判断编码进行解析
 94 |     enc = chardet.detect(html_body_bytes)
 95 |     html_body = html_body_bytes.decode(encoding=enc['encoding'])
 96 | 
 97 |     # 去掉换行符，早期版本的 html 文件使用了 \r\n 换行符，而且会切断 html 标记。替换掉换行符方便正则
 98 |     html_body = html_body.replace('\r\n', '')
 99 |     html_body = html_body.replace('\n', '')
100 | 
101 |     internal_links: list[WizInternalLink] = []
102 | 
103 |     open_attachments = re.finditer(RE_OPEN_ATTACHMENT_OUTERHTML, html_body, re.IGNORECASE)
104 |     for open_attachement in open_attachments:
105 |         link = WizInternalLink(
106 |             open_attachement.group(0),
107 |             open_attachement.group(2),
108 |             open_attachement.group(3),
109 |             open_attachement.group(1))
110 |         internal_links.append(link)
111 | 
112 |     open_documents = re.finditer(RE_OPEN_DOCUMENT_OUTERHTML, html_body, re.IGNORECASE)
113 |     for open_document in open_documents:
114 |         link = WizInternalLink(
115 |             open_document.group(0),
116 |             open_document.group(2),
117 |             open_document.group(4),
118 |             open_document.group(1))
119 |         internal_links.append(link)
120 | 
121 |     images: list[WizImage] = []
122 |     image_match = re.finditer(RE_IMAGE_OUTERHTML, html_body, re.IGNORECASE)
123 |     for image in image_match:
124 |         img = WizImage(image.group(0), image.group(1), note_extract_dir)
125 |         images.append(img)
126 |     return html_body, internal_links, images
127 | 
128 | 
129 | def tots(dt: str):
130 |     """ 转换本地时间到时间戳，数据库中记录的是东八区本地时间
131 |     """
132 |     return int(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone(timedelta(hours=8))).timestamp()*1000)
133 | 
134 | 
135 | def towizid(id: str) -> str:
136 |     """ 从 joplin 的 id 格式转为 wiz 的 guid 格式
137 |     """
138 |     one = id[:8]
139 |     two = id[8:12]
140 |     three = id[12:16]
141 |     four = id[16:20]
142 |     five = id[20:]
143 |     return '-'.join([one, two, three, four, five])
144 | 
145 | 
146 | def tojoplinid(guid: str) -> str:
147 |     """ 从 wiz 的 guid 格式转为 joplin 的 id 格式
148 |     """
149 |     return ''.join(guid.split('-'))
150 | 
151 | 
152 | class JoplinInternalLink(object):
153 |     """ 与 Wiz 内链不同，Joplin 内链包括 附件(链接到 resource)、图像(链接到 resource)、文档（链接到 note)
154 |     """
155 |     note_id: str
156 |     resource_id: str
157 | 
158 |     # image / open_attachment / open_document
159 |     link_type: str
160 | 
161 |     # 链接的 title
162 |     title: str = None
163 | 
164 |     # 链接的整个文本内容，可能是 markdown 格式也可能是html格式，取决于 note_id 是何种格式
165 |     outertext: str
166 | 
167 |     def __init__(self, note_id: str, resource_id: str, title: str, link_type: int, outertext:str='') -> None:
168 |         self.note_id = note_id
169 |         self.resource_id = resource_id
170 |         self.title = title
171 |         self.link_type = link_type
172 |         self.outertext = outertext
173 | 
174 |     @property
175 |     def id(self) -> str:
176 |         return f'{self.note_id}-{self.resource_id}'
177 | 
178 | 
179 | def gen_ilstr(is_markdown: bool, jil: JoplinInternalLink) -> str:
180 |     """ 返回被替换的内链
181 |     ilstr = internal link str
182 |     """
183 |     if is_markdown:
184 |         body = f'[{jil.title}](:/{jil.resource_id})'
185 |         if jil.link_type == 'image':
186 |             return '!' + body
187 |         return body
188 |     if jil.link_type == 'image':
189 |         return f'<img src=":/{jil.resource_id}" alt="{jil.title}">'
190 |     return f'<a href=":/{jil.resource_id}">{jil.title}</a>'
191 | 
192 | 
193 | def gen_end_ilstr(is_markdown: bool, jils: list[JoplinInternalLink]):
194 |     """ 返回 body 底部要加入的内容
195 |     ilstr = internal link str
196 |     """
197 |     if is_markdown:
198 |         return '\n\n# 附件链接\n\n' + '\n'.join([ '- ' + gen_ilstr(is_markdown, jil) for jil in jils])
199 |     body = ''.join([ f'<li>{gen_ilstr(is_markdown, jil)}</li>' for jil in jils])
200 |     return f'<br><br><h1>附件链接</h1><ul>{body}</ul>'
201 |     
202 | 
203 | def convert_joplin_body(body: str, is_markdown: bool, internal_links: list[JoplinInternalLink]) -> str:
204 |     """ 将为知笔记中的 body 转换成 Joplin 内链
205 |     """
206 |     insert_to_end: list[JoplinInternalLink] = []
207 |     for jil in internal_links:
208 |         # 替换链接
209 |         if jil.outertext:
210 |             body = body.replace(jil.outertext, gen_ilstr(is_markdown, jil))
211 |         # 所有的附件，需要在body 底部加入链接
212 |         if jil.link_type == 'open_attachment':
213 |             insert_to_end.append(jil)
214 |     # 处理 markdown 转换
215 |     if is_markdown:
216 |         body = get_text(body)
217 |     if insert_to_end:
218 |         body += gen_end_ilstr(is_markdown, insert_to_end)
219 |     return body


--------------------------------------------------------------------------------
/w2j/wiz.py:
--------------------------------------------------------------------------------
  1 | ##############################
  2 | # w2j.wiz
  3 | # 处理为知笔记相关
  4 | ##############################
  5 | 
  6 | from os import PathLike
  7 | from typing import Any, Optional
  8 | from pathlib import Path
  9 | import sqlite3
 10 | from zipfile import ZipFile, BadZipFile
 11 | 
 12 | from w2j import logger, work_dir as default_work_dir
 13 | from w2j.parser import parse_wiz_html, tots, WizInternalLink, WizImage
 14 | 
 15 | 
 16 | class WizAttachment(object):
 17 |     """ 为知笔记附件
 18 | 
 19 |     在为知笔记中，附件属于一种资源，拥有自己的 guid
 20 |     """
 21 |     # 附件的 guid
 22 |     guid: str = None
 23 | 
 24 |     # 附件所属的文档 guid
 25 |     doc_guid: str = None
 26 | 
 27 |     # 附件的名称，一般是文件名
 28 |     name: str = None
 29 | 
 30 |     # 附件在硬盘上的文件名，格式为 {guid}name
 31 |     file_name: str = None
 32 | 
 33 |     # 附件的修改时间
 34 |     modified: int = 0
 35 | 
 36 |     # 附件的文件名所在地
 37 |     file: Path = None
 38 | 
 39 |     def __init__(self, guid: str, doc_guid: str, name: str, modified: str, attachments_dir: Path, check_file: bool = False) -> None:
 40 |         self.guid = guid
 41 |         self.doc_guid = doc_guid
 42 |         self.name = name
 43 |         self.modified = tots(modified)
 44 |         self.file_name =f'{{{self.guid}}}{self.name}'
 45 | 
 46 |         self.file = attachments_dir.joinpath(self.file_name)
 47 |         if check_file:
 48 |             self.check_file()
 49 |     
 50 |     def check_file(self):
 51 |         """ 检测附件是否存在
 52 |         """
 53 |         if not self.file.exists():
 54 |             raise FileNotFoundError(f'找不到文件 {self.file_name}')
 55 | 
 56 |     def __repr__(self) -> str:
 57 |         return f'<WizAttachment {self.guid}, {self.name}, {self.modified}>'
 58 | 
 59 | 
 60 | class WizTag(object):
 61 |     """ 为知笔记 TAG
 62 |     """
 63 |     # tag 的 guid
 64 |     guid: str = None
 65 | 
 66 |     name: str = None
 67 | 
 68 |     modified: int = 0
 69 | 
 70 |     def __init__(self, guid, name, modified) -> None:
 71 |         self.guid = guid
 72 |         self.name = name
 73 |         self.modified = tots(modified)
 74 | 
 75 |     def __repr__(self) -> str:
 76 |         return f'<WizTag {self.guid}, {self.name}, {self.modified}>'
 77 | 
 78 | 
 79 | class WizDocument(object):
 80 |     """ 为知笔记文档
 81 |     """
 82 |     # 文档的 guid
 83 |     guid: str = None
 84 |     title: str = None
 85 | 
 86 |     # 文件夹，为知笔记的文件夹就是一个用 / 分隔的字符串
 87 |     location: str = None
 88 | 
 89 |     # 保存一个 Folder 对象，这个对象在 Adapter 进行 Location 到 Folder 的转换之后才会填充
 90 |     folder: Any = None
 91 |     
 92 |     url: str = None # 如果文档是采集的，则这个地址为文档的采集url
 93 | 
 94 |     created: int = 0
 95 | 
 96 |     modified: int = 0
 97 | 
 98 |     # 从数据库中读取的附件数量，如果大于 0 说明这个文档有附件
 99 |     attachment_count: int = 0
100 | 
101 |     # 文档压缩包
102 |     note_file: Path = None
103 | 
104 |     # 文档压缩包解压到的路径
105 |     note_extract_dir: Path = None
106 | 
107 |     # 文档解压到的主文件夹
108 |     documents_dir: Path
109 | 
110 |     # 文档正文
111 |     body: str = None
112 | 
113 |     # markdown，默认为 markdown
114 |     is_markdown: bool = True
115 | 
116 |     # 文档的标签
117 |     tags: list[WizTag] = []
118 | 
119 |     # 文档的附件
120 |     attachments: list[WizAttachment] = []
121 | 
122 |     # 包含在为知笔记文档中的图像文件，需要在文档正文中使用正则提取
123 |     images: list[WizImage] = []
124 | 
125 |     # 包含在为知笔记文档中的内部链接，需要在文档征文中使用正则提取
126 |     internal_links: list[WizInternalLink] = []
127 | 
128 |     def __init__(self, guid: str, title: str, location: str, url: str, created: str, modified: str, attachment_count: int, notes_dir: Path, documents_dir: Path, check_file: bool = False) -> None:
129 |         self.guid = guid
130 |         self.location = location
131 |         self.url = url
132 |         self.created = tots(created)
133 |         self.modified = tots(modified)
134 |         self.attachment_count = attachment_count
135 | 
136 |         self.documents_dir = documents_dir
137 | 
138 |         self.is_markdown = title.endswith('.md')
139 |         if self.is_markdown and len(title) > 3:
140 |             self.title = title[:-3]
141 |         else:
142 |             self.title = title
143 | 
144 |         self.note_file = notes_dir.joinpath(f'{{{self.guid}}}')
145 |         if check_file:
146 |             self.check_note_file()
147 | 
148 |     def check_note_file(self):
149 |         if self.note_file is None or not self.note_file.exists():
150 |             raise FileNotFoundError(f'找不到 note 文件 {self.note_file}！')
151 | 
152 |     def resolve_attachments(self, attachments: list[WizAttachment]) -> None:
153 |         self.attachments = attachments
154 |         if len(self.attachments) != self.attachment_count:
155 |             raise ValueError(f'附件数量不匹配 {len(self.attachments)} != {self.attachment_count}！')
156 |         # 检测所有附件文件是否存在
157 |         try:
158 |             for attach in self.attachments:
159 |                 attach.check_file()
160 |         except FileNotFoundError as e:
161 |             msg = f'{e!s}，请检查文档 {self.title}'
162 |             raise FileNotFoundError(msg)
163 |         
164 |     def resolve_tags(self, tags: list[WizTag]) -> None:
165 |         self.tags = tags
166 | 
167 |     def _extract_zip(self) -> None:
168 |         """ 解压缩当前文档的 zip 文件到 work_dir，以 guid 为子文件夹名称
169 |         """
170 |         self.note_extract_dir = self.documents_dir.joinpath(self.guid)
171 |         # 如果目标文件夹已经存在，就不解压了
172 |         if self.note_extract_dir.exists():
173 |             # logger.info(f'{self.note_extract_dir!s} |{self.title}| 已经存在，跳过。')
174 |             return
175 |         try:
176 |             zip_file = ZipFile(self.note_file)
177 |             zip_file.extractall(self.note_extract_dir)
178 |         except BadZipFile as e:
179 |             msg = f'ZIP 文件错误，可能是需要密码。 {self.note_file!s} |{self.title}|'
180 |             raise BadZipFile(msg)
181 |             # logger.info(msg)
182 | 
183 |     def _parse_wiz_note(self) -> None:
184 |         """ 解析 index.html 文件
185 |         """
186 |         if self.note_extract_dir is None:
187 |             raise FileNotFoundError(f'请先解压缩文档 {self.note_file!s} |{self.title}|')
188 | 
189 |         self.body, self.internal_links, self.images = parse_wiz_html(self.note_extract_dir, self.title)
190 | 
191 |     def resolve_body(self) -> None:
192 |         """ 解压文档压缩包，解析文档正文中的图像文件，将其转换为 WizImage
193 |         将正文存入 body
194 |         """
195 |         self.check_note_file()
196 |         self._extract_zip()
197 |         self._parse_wiz_note()
198 | 
199 |     def resolve(self, attachments: list[WizAttachment], tags: list[WizTag]) -> None:
200 |         self.resolve_attachments(attachments)
201 |         self.resolve_tags(tags)
202 |         self.resolve_body()
203 | 
204 |     def __repr__(self):
205 |         return f'<w2j.wiz.WizDocument {self.note_file.resolve()} |{self.title}| tags: {len(self.tags)} attachments: {len(self.attachments)} markdown: {self.is_markdown}>'
206 | 
207 | 
208 | class DataDir(object):
209 |     """ 保存 data 文件夹中的 Path 对象
210 |     """
211 |     def __init__(self, data_dir: Path):
212 |         self.data_dir = data_dir
213 | 
214 |         self.attachments_dir = self.data_dir.joinpath('attachments/')
215 |         if not self.attachments_dir.is_dir():
216 |             raise FileNotFoundError(f'找不到文件夹 {self.attachments_dir.resolve()}！')
217 | 
218 |         self.notes_dir = self.data_dir.joinpath('notes/')
219 |         if not self.notes_dir.is_dir():
220 |             raise FileNotFoundError(f'找不到文件夹 {self.notes_dir.resolve()}！')
221 | 
222 |         self.index_db = self.data_dir.joinpath('index.db')
223 |         if not self.index_db.exists():
224 |             raise FileNotFoundError(f'找不到数据库 {self.index_db.resolve()}！')
225 | 
226 |         self.wizthumb_db = self.data_dir.joinpath('wizthumb.db')
227 |         if not self.wizthumb_db.exists():
228 |             raise FileNotFoundError(f'找不到数据库 {self.wizthumb_db.resolve()}！')
229 | 
230 |     def _get_one_document(self, guid: str) -> tuple[Optional[tuple], list, list]:
231 |         conn = sqlite3.connect(self.index_db)
232 |         cur = conn.cursor()
233 | 
234 |         sql = '''SELECT
235 |         DOCUMENT_GUID, DOCUMENT_TITLE, DOCUMENT_LOCATION, DOCUMENT_URL, DT_CREATED, DT_MODIFIED, DOCUMENT_ATTACHEMENT_COUNT
236 |         FROM WIZ_DOCUMENT
237 |         WHERE DOCUMENT_GUID = ?
238 |         '''
239 |         cur.execute(sql, (guid, ))
240 |         document_row = cur.fetchone()
241 |         attachment_rows  = []
242 |         tag_rows = []
243 | 
244 |         if document_row:
245 |             sql = '''SELECT
246 |             ATTACHMENT_GUID, DOCUMENT_GUID, ATTACHMENT_NAME, DT_INFO_MODIFIED
247 |             FROM WIZ_DOCUMENT_ATTACHMENT
248 |             WHERE DOCUMENT_GUID = ?
249 |             '''
250 |             cur.execute(sql, (guid, ))
251 |             attachment_rows = cur.fetchall()
252 | 
253 |             sql = '''SELECT
254 |             WIZ_TAG.TAG_GUID, WIZ_TAG.TAG_NAME, WIZ_TAG.DT_MODIFIED
255 |             FROM WIZ_DOCUMENT_TAG INNER JOIN WIZ_TAG
256 |             ON WIZ_DOCUMENT_TAG.TAG_GUID = WIZ_TAG.TAG_GUID
257 |             WHERE WIZ_DOCUMENT_TAG.DOCUMENT_GUID = ?
258 |             '''
259 |             cur.execute(sql, (guid, ))
260 |             tag_rows = cur.fetchall()
261 | 
262 |         conn.close()
263 |         return document_row, attachment_rows, tag_rows
264 | 
265 |     def _get_all_document(self):
266 |         """ 获取 WIZ_DUCUMENT 的所有记录
267 |         """
268 |         conn = sqlite3.connect(self.index_db)
269 |         cur = conn.cursor()
270 |         cur.execute('SELECT DOCUMENT_GUID, DOCUMENT_TITLE, DOCUMENT_LOCATION, DOCUMENT_URL, DT_CREATED, DT_MODIFIED, DOCUMENT_ATTACHEMENT_COUNT FROM WIZ_DOCUMENT')
271 |         rows = cur.fetchall()
272 |         conn.close()
273 |         return rows
274 | 
275 |     def _get_all_attachment(self) -> list:
276 |         """ 获取 WIZ_DOCUMENT_ATTACHMENT 的所有记录
277 |         """
278 |         conn = sqlite3.connect(self.index_db)
279 |         cur = conn.cursor()
280 |         cur.execute('SELECT ATTACHMENT_GUID, DOCUMENT_GUID, ATTACHMENT_NAME, DT_INFO_MODIFIED FROM WIZ_DOCUMENT_ATTACHMENT')
281 |         rows = cur.fetchall()
282 |         conn.close()
283 |         return rows
284 | 
285 |     def _get_all_tag(self) -> list:
286 |         """ 获取 WIZ_TAG 的所有记录
287 |         """
288 |         conn = sqlite3.connect(self.index_db)
289 |         cur = conn.cursor()
290 |         cur.execute('SELECT TAG_GUID, TAG_NAME, DT_MODIFIED FROM WIZ_TAG')
291 |         rows = cur.fetchall()
292 |         conn.close()
293 |         return rows
294 | 
295 |     def _get_all_document_tag(self) -> list:
296 |         """ 获取 WIZ_DOCUMENT_TAG 的所有记录
297 |         """
298 |         conn = sqlite3.connect(self.index_db)
299 |         cur = conn.cursor()
300 |         cur.execute('SELECT DOCUMENT_GUID, TAG_GUID FROM WIZ_DOCUMENT_TAG')
301 |         rows = cur.fetchall()
302 |         conn.close()
303 |         return rows
304 | 
305 |     def __repr__(self):
306 |         return f'<w2j.wiz.DataDir {self.data_dir.resolve()}>'
307 | 
308 | 
309 | class WizStorage(object):
310 |     """ 保存所有为知笔记的数据
311 |     """
312 |     # 工作文件夹所在地址，临时文件会置于工作文件夹中
313 |     work_dir: Path
314 | 
315 |     # 为知笔记文档解压到这个文件夹
316 |     documents_dir: Path
317 | 
318 |     wiznote_dir: Path
319 |     user_id: str
320 |     user_dir: Path
321 |     group_dir: Path
322 | 
323 |     # 是否为 group 仓库
324 |     is_group_storage: bool = False
325 | 
326 |     data_dir: DataDir
327 | 
328 |     # 所有的 TAG
329 |     tags: list[WizTag] = []
330 |     # 键名为文档的 guid，键值为该该文档中的 Tag
331 |     tags_in_document: dict[str, list[WizTag]] = {}
332 | 
333 |     # 所有的附件
334 |     attachments: list[WizAttachment] = []
335 |     # 键名为文档的 guid，键值为该该文档中的 Attachment
336 |     attachments_in_document: dict[str, list[WizAttachment]] = {}
337 | 
338 |     # 所有的图片
339 |     images: list[WizImage] = []
340 |     # 键名为文档的 guid，键值为该该文档中的 Image
341 |     images_in_document: dict[str, list[WizImage]] = {}
342 | 
343 |     # 所有的文档
344 |     documents: list[WizDocument] = []
345 | 
346 |     def __init__(self, user_id: str, wiznote_dir: Path, is_group_storage: bool = False, work_dir: Path = None):
347 |         """ 定义位置笔记文件夹
348 |         :param user_id: 帐号邮箱
349 |         :param winznote_dir: 帐号所在文件夹
350 |         :param work_dir: 工作文件夹，用于解压文件等操作，若不提供则使用临时文件夹
351 |         """
352 |         self.work_dir = work_dir or default_work_dir
353 | 
354 |         # 创建专门解压缩位置文档的文件夹
355 |         self.documents_dir = self.work_dir.joinpath('documents')
356 |         if not self.documents_dir.exists():
357 |             self.documents_dir.mkdir(parents=True)
358 | 
359 |         self.wiznote_dir = wiznote_dir
360 |         self.user_id = user_id
361 |         self.user_dir = self.wiznote_dir.joinpath(user_id)
362 |         self.group_dir = self.user_dir.joinpath('group')
363 |         self.is_group_storage = is_group_storage
364 | 
365 |         # data 的根文件夹
366 |         root_data_dir = DataDir(self.user_dir.joinpath('data/'))
367 |         # 获取 group 仓库，位于 data 根文件夹之下
368 |         if self.is_group_storage:
369 |             biz_guid = self._get_biz_guid(root_data_dir.index_db)
370 |             self.data_dir = DataDir(self.group_dir.joinpath(biz_guid))
371 |         else:
372 |             self.data_dir = root_data_dir
373 | 
374 |     def _get_biz_guid(self, index_db: Path) -> str:
375 |         """ 通过一次查询获取 user 的 guid
376 |         """
377 |         conn = sqlite3.connect(index_db)
378 |         cur = conn.cursor()
379 |         cur.execute('SELECT BIZ_GUID FROM WIZ_USER where USER_ID=?', (self.user_id,))
380 |         row = cur.fetchone()
381 |         conn.close()
382 |         if row is not None:
383 |             return row[0]
384 |         return None
385 | 
386 |     def _build_tags(self) -> tuple[list[WizTag], dict[str, list[WizTag]]]:
387 |         """ 根据数据库内容构建所有的 tag 列表
388 |         创建一个 dict ，键名为文档 guid，键值为该文档中的 Tag 列表
389 |         返回这两个列表
390 |         """
391 |         tag_rows = self.data_dir._get_all_tag()
392 |         tags: list[WizTag] = []
393 |         # 创建一个临时的 dict 用于查找 tag guid
394 |         key_tags: dict[str, WizTag] = {}
395 |         for row in tag_rows:
396 |             tag = WizTag(*row)
397 |             tags.append(tag)
398 |             key_tags[tag.guid] = tag
399 | 
400 |         tag_in_doc_rows = self.data_dir._get_all_document_tag()
401 |         tags_dict: dict[str, list[WizTag]] = {}
402 | 
403 |         for row in tag_in_doc_rows:
404 |             doc_guid = row[0]
405 |             tag_guid = row[1]
406 | 
407 |             if tags_dict.get(doc_guid) is None:
408 |                 tags_dict[doc_guid] = []
409 | 
410 |             # 如果在 key_tags 中找不到 tag_guid 会报错，此时就需要检查为知笔记中的 tag 设置了
411 |             tags_dict[doc_guid].append(key_tags[tag_guid])
412 |         return tags, tags_dict
413 | 
414 |     def _build_attachments(self) -> tuple[list[WizAttachment], dict[str, list[WizAttachment]]]:
415 |         """ 根据数据库内容构建所有的 attachemnt 列表
416 |         创建一个 dict ，键名为文档 guid，键值为该文档中的 attachment 列表
417 |         返回这两个列表
418 |         """
419 |         rows = self.data_dir._get_all_attachment()
420 |         attachments: list[WizAttachment] = []
421 | 
422 |         attachments_in_document: dict[str, list[WizAttachment]] = {}
423 | 
424 |         for row in rows:
425 |             attachment = WizAttachment(*row, self.data_dir.attachments_dir)
426 |             attachments.append(attachment)
427 |             if attachments_in_document.get(attachment.doc_guid) is None:
428 |                 attachments_in_document[attachment.doc_guid] = []
429 |             attachments_in_document[attachment.doc_guid].append(attachment)
430 |         return attachments, attachments_in_document
431 | 
432 |     def build_documents(self) -> list[WizDocument]:
433 |         """ 根据数据库内容构建所有的 document 列表
434 |         """
435 |         rows = self.data_dir._get_all_document()
436 | 
437 |         attachments, attachments_in_doc = self._build_attachments()
438 |         tags, tags_in_doc = self._build_tags()
439 | 
440 |         self.attachments = attachments
441 |         self.attachments_in_document = attachments_in_doc
442 |         self.tags = tags
443 |         self.tags_in_document = tags_in_doc
444 | 
445 |         documents: list[WizDocument] = []
446 |         for row in rows:
447 |             document = WizDocument(*row, self.data_dir.notes_dir, self.documents_dir, check_file=True)
448 |             document.resolve(
449 |                 self.attachments_in_document.get(document.guid, []),
450 |                 self.tags_in_document.get(document.guid, [])
451 |             )
452 |             documents.append(document)
453 |         return documents
454 | 
455 |     def build_document(self, guid: str) -> WizDocument:
456 |         """ 构建一个 document
457 |         """
458 |         document_row, attachment_rows, tag_rows = self.data_dir._get_one_document(guid)
459 |         if not document_row:
460 |             raise ValueError(f'找不到文档 {guid}！')
461 | 
462 |         attachments: list[WizAttachment] = []
463 |         for row in attachment_rows:
464 |             attachments.append(WizAttachment(*row, self.data_dir.attachments_dir, check_file=False))
465 |         
466 |         tags: list[WizTag] = []
467 |         for row in tag_rows:
468 |             tags.append(WizTag(*row))
469 | 
470 |         document = WizDocument(*document_row, self.data_dir.notes_dir, self.documents_dir, check_file=True)
471 |         document.resolve(attachments, tags)
472 |         return document
473 | 
474 |     def resolve(self) -> None:
475 |         """ 解析所有文档并保存相关数据
476 |         调用此方法后，所有数据安全并可用
477 |         """
478 |         self.documents = self.build_documents()
479 |         
480 |     def clear(self) -> None:
481 |         """ 删除解压的临时文件夹
482 |         """
483 |         self.documents_dir.unlink()
484 | 
485 | 


--------------------------------------------------------------------------------
/wiznoteformac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zrong/wiz2joplin/0ce8bf9a867171176c28f199addbef95fe8c6b96/wiznoteformac.png


--------------------------------------------------------------------------------