├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── setup.py ├── tests ├── __init__.py ├── conftest.py ├── test_adapter.py ├── test_joplin.py └── test_wiz.py ├── w2j ├── __init__.py ├── __main__.py ├── adapter.py ├── joplin.py ├── parser.py └── wiz.py └── wiznoteformac.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | output/ 131 | .vscode/ 132 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Migrate from WizNote to Joplin. 2 | 3 | ## !!!CAUTION!!!! 4 | 5 | wiz2joplin has only been tested in wizNote for Mac 2.8.7. AFAIK, the folder structure of the macOS and Windows versions of wizNote may be different. 6 | 7 | If you can provide a pull request for wizNote of Windows, I believe it will be helpful to many people. 8 | 9 | ## Dependency 10 | 11 | - Python 3.9 12 | - macOS Catalina or above 13 | - wizNote for Mac 2.8.7 (2020.8.20 10:28) 14 | - ![wiznote for macOS](wiznoteformac.png) 15 | 16 | ## Installation 17 | 18 | To install this tool, you can use pip: 19 | 20 | ``` 21 | python -m venv ~/w2j/venv 22 | source ~/w2j/venv/bin/activate 23 | pip install w2j 24 | ``` 25 | 26 | Alternatively, you can install the package using the bundled setup script: 27 | 28 | ``` 29 | python -m venv ~/w2j/venv 30 | source ~/w2j/venv/bin/activate 31 | python setup.py install 32 | ``` 33 | 34 | ## Usage 35 | 36 | If your WizNote user id is `youremail@yourdomain.com`, the token in Joplin Web Clipper is `aa630825022a340ecbe5d3e2f25e5f6a`, and Joplin run on the same computer, you can use wiz2joplin like follows. 37 | 38 | Convert all of documents from wizNote to Joplin: 39 | 40 | ``` shell 41 | w2j -o ~/w2j -w ~/.wiznote -u youremail@yourdomain.com -t aa630825022a340ecbe5d3e2f25e5f6a -a 42 | ``` 43 | 44 | Convert location `/My Notes/reading/` and all of the children documents from WizNote to Joplin: 45 | 46 | ``` shell 47 | w2j -o ~/w2j -w ~/.wiznote -u youremail@yourdomain.com -t aa630825022a340ecbe5d3e2f25e5f6a -l '/My Note/reading/' -r 48 | 49 | ``` 50 | 51 | Use `w2j --help` to show usage for w2j: 52 | 53 | ``` 54 | usage: w2j [-h] --output OUTPUT --wiz-dir WIZNOTE_DIR --wiz-user 55 | WIZNOTE_USER_ID --joplin-token JOPLIN_TOKEN 56 | [--joplin-host JOPLIN_HOST] [--joplin-port JOPLIN_PORT] 57 | [--location LOCATION] [--location-children] [--all] 58 | 59 | Migrate from WizNote to Joplin. 60 | 61 | optional arguments: 62 | -h, --help show this help message and exit 63 | --output OUTPUT, -o OUTPUT 64 | The output dir for unziped WizNote file and log file. 65 | e.g. ~/wiz2joplin_output or 66 | C:\Users\zrong\wiz2joplin_output 67 | --wiz-dir WIZNOTE_DIR, -w WIZNOTE_DIR 68 | Set the data dir of WizNote. e.g ~/.wiznote or 69 | C:\Program Files\WizNote 70 | --wiz-user WIZNOTE_USER_ID, -u WIZNOTE_USER_ID 71 | Set your user id(login email) of WizNote. 72 | --joplin-token JOPLIN_TOKEN, -t JOPLIN_TOKEN 73 | Set the authorization token to access Joplin Web 74 | Clipper Service. 75 | --joplin-host JOPLIN_HOST, -n JOPLIN_HOST 76 | Set the host of your Joplin Web Clipper Service, 77 | default is 127.0.0.1 78 | --joplin-port JOPLIN_PORT, -p JOPLIN_PORT 79 | Set the port of your Joplin Web Clipper Service, 80 | default is 41184 81 | --location LOCATION, -l LOCATION 82 | Convert the location of WizNote, e.g. /My Notes/. If 83 | you use the --all parameter, then skip --location 84 | parameter. 85 | --location-children, -r 86 | Use with --location parameter, convert all children 87 | location of --location. 88 | --all, -a Convert all documents of your WizNote. 89 | ``` 90 | 91 | ## Log file 92 | 93 | Please read log file `w2j.log` under --output directory to check the conversion states. 94 | 95 | ## 源码分析相关文章 96 | 97 | - [从 WizNote 为知笔记到 Joplin(上)](https://blog.zengrong.net/post/wiznote2joplin1/) 98 | - [从 WizNote 为知笔记到 Joplin(下)](https://blog.zengrong.net/post/wiznote2joplin2/) 99 | - [WizNote 为知笔记 macOS 本地文件夹分析](https://blog.zengrong.net/post/analysis-of-wiznote/) 100 | - [使用腾讯云对象存储(COS)实现Joplin同步](https://blog.zengrong.net/post/joplin-sync-use-cos/) 101 | - [配置 Joplin Server 实现同步](https://blog.zengrong.net/post/joplin-server-config/) 102 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | idna==2.10 2 | chardet 3 | httpx 4 | inscriptis -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | from setuptools import setup, find_packages 4 | 5 | here = Path(__file__).parent 6 | 7 | def read(*parts): 8 | """ 读取一个文件并返回内容 9 | """ 10 | return here.joinpath(*parts).read_text(encoding='utf8') 11 | 12 | def find_version(*file_paths): 13 | """ 从 __init__.py 的 __version__ 变量中提取版本号 14 | """ 15 | version_file = read(*file_paths) 16 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", 17 | version_file, re.M) 18 | if version_match: 19 | return version_match.group(1) 20 | raise RuntimeError("Unable to find version string.") 21 | 22 | def find_requires(*file_paths): 23 | """ 将提供的 requirements.txt 按行转换成 list 24 | """ 25 | require_file = read(*file_paths) 26 | return require_file.splitlines() 27 | 28 | def static_requires(): 29 | return ['idna==2.10', 'chardet', 'httpx', 'inscriptis'] 30 | 31 | classifiers = [ 32 | 'Programming Language :: Python :: 3.9', 33 | 'Development Status :: 4 - Beta', 34 | 'Environment :: Console', 35 | 'Topic :: System :: Shells', 36 | 'Topic :: Utilities', 37 | 'Topic :: Text Processing :: Markup :: HTML', 38 | 'Topic :: Text Processing :: Markup :: Markdown', 39 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 40 | ] 41 | 42 | # 使用 flask 的扩展 43 | entry_points = { 44 | 'console_scripts': [ 45 | 'w2j=w2j:main' 46 | ] 47 | } 48 | 49 | package_data = { 50 | '': ['requirements.txt'] 51 | } 52 | 53 | 54 | setup( 55 | python_requires='>=3.9, <4', 56 | name = "w2j", 57 | version=find_version('w2j', '__init__.py'), 58 | author = "zrong", 59 | author_email = "zrongzrong@gmail.com", 60 | url = "https://github.com/zrong/wiz2joplin", 61 | description = "A tool for migrating from WizNote to Joplin.", 62 | long_description=read('README.md'), 63 | long_description_content_type='text/markdown', 64 | license = "GPLv3", 65 | keywords = "development zrong wiznote joplin", 66 | packages = find_packages(exclude=['test*', 'output', 'venv']), 67 | install_requires=static_requires(), 68 | entry_points=entry_points, 69 | include_package_data = True, 70 | zip_safe=False, 71 | classifiers = classifiers, 72 | package_data=package_data 73 | ) 74 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zrong/wiz2joplin/0ce8bf9a867171176c28f199addbef95fe8c6b96/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from os import environ 3 | 4 | import pytest 5 | 6 | from w2j import wiz, joplin 7 | from w2j.adapter import Adapter 8 | 9 | 10 | @pytest.fixture(scope='session') 11 | def work_dir(): 12 | work_dir = Path(__file__).parent.parent.joinpath('output/') 13 | if not work_dir.exists(): 14 | work_dir.mkdir() 15 | return work_dir 16 | 17 | 18 | @pytest.fixture(scope='session') 19 | def ws(wiz_user_id: str, work_dir: Path): 20 | """ 提供一个为知笔记存储对象 21 | """ 22 | wiznote_dir = Path('~/.wiznote').expanduser() 23 | ws = wiz.WizStorage(wiz_user_id, wiznote_dir, is_group_storage=False, work_dir=work_dir) 24 | return ws 25 | 26 | 27 | @pytest.fixture(scope='session') 28 | def wiz_user_id(): 29 | return environ.get('W2J_USER_ID') 30 | 31 | 32 | @pytest.fixture(scope='session') 33 | def jda(): 34 | return joplin.JoplinDataAPI( 35 | token='d3098caff3d80561bf915c15cf3f70956c3550fc13e67bee78f74b1a6b2d2632dff10667668cc6682df27d493aa35492b68fa3f642f738fd80547acf571dc17c' 36 | ) 37 | 38 | @pytest.fixture(scope='session') 39 | def js(): 40 | joplin_dir = Path('~/.config/joplin-desktop').expanduser() 41 | return joplin.JoplinStorage(joplin_dir) 42 | 43 | 44 | @pytest.fixture(scope='session') 45 | def adapter(ws: wiz.WizStorage, jda: joplin.JoplinDataAPI, work_dir: Path): 46 | return Adapter(ws, jda, work_dir) 47 | 48 | 49 | @pytest.fixture(scope='session') 50 | def wsg(wiz_user_id: str, work_dir: Path): 51 | """ 提供一个为知笔记 Group 存储对象 52 | """ 53 | wiznote_dir = Path('~/.wiznote').expanduser() 54 | wsg = wiz.WizStorage(wiz_user_id, wiznote_dir, is_group_storage=True, work_dir=work_dir) 55 | return wsg 56 | 57 | @pytest.fixture(scope='session') 58 | def adapter_group(wsg: wiz.WizStorage, jda: joplin.JoplinDataAPI, work_dir: Path): 59 | return Adapter(wsg, jda, work_dir) -------------------------------------------------------------------------------- /tests/test_adapter.py: -------------------------------------------------------------------------------- 1 | from w2j.wiz import WizStorage 2 | from w2j.joplin import JoplinStorage 3 | import pytest 4 | 5 | from w2j.adapter import Adapter 6 | from w2j.parser import tojoplinid 7 | 8 | 9 | @pytest.mark.skip 10 | def test_sync_folders(adapter: Adapter): 11 | adapter.sync_folders() 12 | 13 | @pytest.mark.skip 14 | def test_sync_tags(adapter: Adapter): 15 | adapter.sync_tags() 16 | 17 | 18 | @pytest.mark.skip 19 | def test_convert_body(ws: WizStorage, adapter: Adapter): 20 | """ 测试转换一个文档到 Jopoin 21 | """ 22 | adapter.sync_folders() 23 | adapter.sync_tags() 24 | # 2021-01-4weeks.md 25 | # guid = '7fdcba42-4d6e-4c1d-a7d8-ffb97a5cca2f' 26 | # 怎样理性愉快地度过这一生?世界首富巴菲特的合伙人芒格告诉你! 27 | # guid = 'b500bc97-e6d8-4038-be8d-cd1f182e880c' 28 | 29 | guid = '7594790e-b9ab-463a-bc38-7cc546d76513' 30 | wd = ws.build_document('01607fd4-2c63-b6a2-0b87-294529a3f645') 31 | jn = adapter._sync_note(wd) 32 | assert tojoplinid(guid) == jn.id 33 | 34 | 35 | def test_convert_location(adapter: Adapter): 36 | """ 测试转换一个为知笔记目录 到 Joplin,支持同时转换子目录 37 | """ 38 | location = '/微信收藏/' 39 | adapter.sync_note_by_location(location, True) 40 | 41 | 42 | @pytest.mark.skip 43 | def test_convert_all_group(adapter_group: Adapter): 44 | """ 测试为知笔记的 group 对象 45 | """ 46 | location = '/collection/技术/' 47 | adapter_group.sync_all() 48 | 49 | 50 | @pytest.mark.skip 51 | def test_update_joplin_time(adapter_group: Adapter, js: JoplinStorage): 52 | times = [ 53 | {'id': tojoplinid(wd.guid), 'created_time': wd.created, 'updated_time': wd.modified or wd.created} 54 | for wd in adapter_group.ws.documents] 55 | js.update_time(times) -------------------------------------------------------------------------------- /tests/test_joplin.py: -------------------------------------------------------------------------------- 1 | from os import name 2 | from pathlib import Path 3 | from w2j.joplin import JoplinDataAPI 4 | import pytest 5 | 6 | def test_ping(jda: JoplinDataAPI): 7 | assert jda.ping() 8 | 9 | def test_get_folders(jda: JoplinDataAPI): 10 | folders, has_more, next_page = jda.get_folders(limit=1) 11 | assert has_more == True 12 | # 获取所有的 Folder 13 | folders, has_more, next_page = jda.get_folders(limit=1, page=0) 14 | assert has_more == False 15 | 16 | @pytest.mark.skip 17 | def test_get_folder(jda: JoplinDataAPI): 18 | # 临时记录 Folder 19 | # test_id = 'f02beb3e93f4456ea4032613b9a9575d' 20 | test_id = '467852588872421b939824efdbc26266' 21 | folder = jda.get_folder(test_id) 22 | assert folder.id == test_id 23 | 24 | 25 | @pytest.mark.skip 26 | def test_post_folders(jda: JoplinDataAPI): 27 | folder = jda.post_folders(title='创新新的folder') 28 | print(folder) 29 | -------------------------------------------------------------------------------- /tests/test_wiz.py: -------------------------------------------------------------------------------- 1 | from w2j.wiz import WizStorage 2 | from pathlib import Path 3 | import pytest 4 | 5 | 6 | def test_datadir(wiz_user_id: str, ws: WizStorage): 7 | wiznote_dir = Path('~/.wiznote').expanduser() 8 | assert str(ws.data_dir.data_dir.resolve()) == str(wiznote_dir.joinpath(wiz_user_id).joinpath('data').resolve()) 9 | 10 | 11 | @pytest.mark.skip 12 | def test_build_tags(ws: WizStorage): 13 | """ 测试 tag 14 | """ 15 | tags, tags_dict = ws._build_tags() 16 | assert len(tags_dict.keys()) > 0 17 | 18 | 19 | @pytest.mark.skip 20 | def test_build_attachments(ws: WizStorage): 21 | """ 测试附件 22 | """ 23 | attachments, attachments_in_document = ws._build_attachments() 24 | # 附件总量一般会大于包含附件文档的数量,因为许多文档包含不止一个附件 25 | assert len(attachments) > len(attachments_in_document) 26 | 27 | 28 | @pytest.mark.skip 29 | def test_build_documents(ws: WizStorage): 30 | """ 测试文档 31 | """ 32 | documents = ws.build_documents() 33 | document_rows = ws.data_dir._get_all_document() 34 | assert len(documents) == len(document_rows) 35 | 36 | 37 | @pytest.mark.skip 38 | def test_build_document(ws: WizStorage): 39 | """ 测试获取一个文档 40 | """ 41 | # 没有 attachment,有一个 tag 42 | one_tag = '49c21d80-dc3f-47d6-b37b-7b5602133600' 43 | 44 | # Flash向量-8-球和角,有一个 attachment 45 | two_open_attachment = '44fba993-8f62-4eef-a7db-5f8b332d95d3' 46 | 47 | # 2021-02-1weeks.md 48 | four_open_document = '32321691-f842-4cf2-8a1a-e9f3f1212a42' 49 | 50 | # linux 技巧:使用 screen 管理你的远程会话 51 | more_images = 'f38c347c-17cb-4342-a063-861876f70660' 52 | 53 | # 超过 10 个图像 54 | more_images2 = 'cc18a030-7445-43ad-939a-ad9e264da8d7' 55 | document = ws.build_document(more_images) 56 | 57 | # document = ws.build_document(four_open_document) 58 | assert document.title == 'Flash向量-8-球和角' -------------------------------------------------------------------------------- /w2j/__init__.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j = Wiznote to Joplin 3 | # 4 | # https://github.com/zrong/wiz2joplin 5 | ############################## 6 | 7 | import logging 8 | import sys 9 | from pathlib import Path 10 | import argparse 11 | 12 | __autho__ = 'zrong' 13 | __version__ = '0.4' 14 | 15 | work_dir = Path.cwd() 16 | logger = logging.Logger('w2j') 17 | log_file = work_dir.joinpath('w2j.log') 18 | log_handler = logging.FileHandler(log_file) 19 | log_handler.setFormatter(logging.Formatter('{asctime} - {funcName} - {message}', style='{')) 20 | # logger.addHandler(logging.StreamHandler(sys.stderr)) 21 | logger.addHandler(log_handler) 22 | 23 | 24 | parser = argparse.ArgumentParser('w2j', description='Migrate from WizNote to Joplin.') 25 | parser.add_argument('--output', '-o', type=str, metavar='OUTPUT', required=True, help='The output dir for unziped WizNote file and log file. e.g. ~/wiz2joplin_output or C:\\Users\\zrong\\wiz2joplin_output') 26 | parser.add_argument('--wiz-dir', '-w', type=str, metavar='WIZNOTE_DIR', required=True, help='Set the data dir of WizNote. e.g ~/.wiznote or C:\\Program Files\\WizNote') 27 | parser.add_argument('--wiz-user', '-u', type=str, metavar='WIZNOTE_USER_ID', required=True, help='Set your user id(login email) of WizNote.') 28 | parser.add_argument('--joplin-token', '-t', type=str, metavar='JOPLIN_TOKEN', required=True, help='Set the authorization token to access Joplin Web Clipper Service.') 29 | parser.add_argument('--joplin-host', '-n', type=str, metavar='JOPLIN_HOST', default='127.0.0.1', help='Set the host of your Joplin Web Clipper Service, default is 127.0.0.1') 30 | parser.add_argument('--joplin-port', '-p', type=int, metavar='JOPLIN_PORT', default=41184, help='Set the port of your Joplin Web Clipper Service, default is 41184') 31 | parser.add_argument('--location', '-l', type=str, metavar='LOCATION', help='Convert the location of WizNote, e.g. /My Notes/. If you use the --all parameter, then skip --location parameter.') 32 | parser.add_argument('--location-children', '-r', action='store_true', help='Use with --location parameter, convert all children location of --location.') 33 | parser.add_argument('--all', '-a', action='store_true', help='Convert all documents of your WizNote.') 34 | args = parser.parse_args() 35 | 36 | 37 | from . import wiz 38 | from . import joplin 39 | from . import adapter 40 | 41 | __all__ = ['wiz', 'joplin', 'adapter'] 42 | 43 | 44 | def main() -> None: 45 | if args.location is None and args.all == False: 46 | print('Please set --location to assign the location of WizNote, or use --all to convert all of the documents!') 47 | return 48 | wiznote_dir = Path(args.wiz_dir).expanduser() 49 | if not wiznote_dir.exists(): 50 | print(f'The wiznote directory {wiznote_dir} is not exists!') 51 | return 52 | output_dir = Path(args.output).expanduser() 53 | if not output_dir.exists(): 54 | output_dir.mkdir() 55 | logger.removeHandler(log_file) 56 | newlog_file = output_dir.joinpath('w2j.log') 57 | print(f'Please read [{newlog_file.resolve()}] to check the conversion states.') 58 | logger.addHandler(logging.FileHandler(newlog_file)) 59 | jda = joplin.JoplinDataAPI( 60 | host=args.joplin_host, 61 | port=args.joplin_port, 62 | token=args.joplin_token 63 | ) 64 | ws = wiz.WizStorage(args.wiz_user, wiznote_dir, is_group_storage=False, work_dir=output_dir) 65 | ad = adapter.Adapter(ws, jda, work_dir=output_dir) 66 | if args.all: 67 | ad.sync_all() 68 | else: 69 | ad.sync_note_by_location(args.location, args.location_children) 70 | -------------------------------------------------------------------------------- /w2j/__main__.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j = Wiznote to Joplin 3 | # 4 | # python w2j 5 | # or 6 | # python -m w2j 7 | ############################## 8 | 9 | import sys 10 | import os 11 | 12 | if not __package__: 13 | path = os.path.join(os.path.dirname(__file__), os.pardir) 14 | sys.path.insert(0, path) 15 | 16 | import w2j 17 | w2j.main() -------------------------------------------------------------------------------- /w2j/adapter.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j.adapter 3 | # 4 | # 适配器,将解析后的为知笔记对象装备成 joplin 笔记对象 5 | ############################## 6 | 7 | from pathlib import Path 8 | from typing import Optional, Union 9 | import json 10 | import sqlite3 11 | 12 | from w2j import logger, work_dir as default_work_dir 13 | from w2j.wiz import WizDocument, WizAttachment, WizImage, WizInternalLink, WizTag, WizStorage 14 | from w2j.joplin import JoplinNote, JoplinFolder, JoplinResource, JoplinTag, JoplinDataAPI 15 | from w2j.parser import tojoplinid, towizid, convert_joplin_body, JoplinInternalLink 16 | 17 | 18 | class Location2Folder(object): 19 | """ 为知笔记的 Location 与 Joplin 的 Folder 之转换关系 20 | """ 21 | # 为知笔记的全路径名称(包含 / 的所有部分) 22 | location: str 23 | 24 | # 当前目录的名称 25 | title: str 26 | 27 | # 为知笔记的父全路径名称 28 | parent_location: str 29 | 30 | # 1/2/3 来表示当前 Folder 处于第几级,顶级为 level1 31 | level: int 32 | 33 | # Joplin Folder guid,只有创建之后才会存在 34 | id: str 35 | 36 | # 父 Joplin Folder guid 37 | parent_id: str 38 | 39 | def __init__(self, location: str, title: str = None, parent_location: str = None, level: int = 0, id: str = None, parent_id: str = None, **kwargs) -> None: 40 | self.location = location 41 | 42 | if title is None: 43 | # 去掉头尾的 / 后使用 / 分隔 44 | titles = location[1:-1].split('/') 45 | 46 | self.level = len(titles) 47 | # 最后一个是当前目录 48 | self.title = titles[-1] 49 | # 有父目录 50 | if self.level > 1: 51 | self.parent_location = '/' + '/'.join(titles[:-1]) + '/' 52 | else: 53 | self.parent_location = None 54 | else: 55 | self.title = title 56 | self.parent_location = parent_location 57 | self.level = level 58 | 59 | self.id = id 60 | self.parent_id = parent_id 61 | 62 | def __conform__(self, protocol) -> str: 63 | if protocol is sqlite3.PrepareProtocol: 64 | return f'{self.location};{self.title};{self.parent_location};{self.level};{self.id};{self.parent_id}' 65 | return '' 66 | 67 | def __repr__(self) -> str: 68 | return f'' 69 | 70 | 71 | class ConvertUtil(): 72 | """ 处理转换的中间过程 73 | """ 74 | # 转换过程中的专用数据库连接 75 | conn: sqlite3.Connection 76 | 77 | # lzf_db 的内容写入 json 文件中,避免每次都要重新生成 Folder,造成重复 78 | db_file: Path 79 | 80 | CREATE_SQL: dict[str, str] = { 81 | # 保存 Location 和 Folder 的关系 82 | 'l2f': """CREATE TABLE l2f ( 83 | location TEXT NOT NULL, 84 | id TEXT, 85 | title TEXT NOT NULL, 86 | parent_location TEXT, 87 | parent_id TEXT, 88 | level INTEGER NOT NULL, 89 | PRIMARY KEY (location) 90 | );""", 91 | # 处理过的文档会保存在这里,在这个表中能找到的文档说明已经转换成功了 92 | 'note': """CREATE TABLE note ( 93 | note_id TEXT not NULL, 94 | title TEXT not NULL, 95 | joplin_folder TEXT NOT NULL, 96 | markup_language INTEGER NOT NULL, 97 | wiz_location TEXT NOT NULL, 98 | PRIMARY KEY (note_id) 99 | );""", 100 | # 处理过的资源保存在这里,包括 image 和 attachment 资源 101 | 'resource': """CREATE TABLE resource ( 102 | resource_id TEXT not NULL, 103 | title TEXT NOT NULL, 104 | filename TEXT NOT NULL, 105 | created_time INTEGER not NULL, 106 | resource_type INTEGER NOT NULL, 107 | PRIMARY KEY (resource_id) 108 | );""", 109 | # 保存为知笔记中的内链,也就是 resource 与 note 的关系,使用 文档 guid 和 连接目标 guid 同时作为主键。链接目标 guid 为 joplin 格式 110 | 'internal_link': """ 111 | CREATE TABLE internal_link ( 112 | note_id TEXT not NULL, 113 | resource_id TEXT not NULL, 114 | title TEXT not NULL, 115 | link_type TEXT NOT NULL, 116 | PRIMARY KEY (note_id, resource_id) 117 | ); 118 | CREATE INDEX idx_link_type ON internal_link (link_type); 119 | CREATE INDEX idx_resource_id ON internal_link (resource_id); 120 | """, 121 | # 保存为知笔记中的 tag 122 | 'tag': """ 123 | CREATE TABLE tag ( 124 | tag_id TEXT not NULL, 125 | title TEXT not NULL, 126 | created_time INTEGER not NULL, 127 | updated_time INTEGER not NULL, 128 | PRIMARY KEY (tag_id) 129 | ); 130 | CREATE UNIQUE INDEX idx_title ON tag (title); 131 | """, 132 | # 保存tag 与note 的关系 133 | 'note_tag': """CREATE TABLE note_tag ( 134 | note_id TEXT not NULL, 135 | tag_id TEXT not NULL, 136 | title TEXT not NULL, 137 | created_time INTEGER not NULL, 138 | PRIMARY KEY (note_id, tag_id) 139 | );""", 140 | } 141 | 142 | # 目录最大的级别 143 | folder_max_level: int = 0 144 | 145 | # 将为知笔记转换到 Joplin 目录的结果存储到 dict 中 146 | l2f_cache: dict[str, Location2Folder] 147 | 148 | folders: dict[str, JoplinFolder] 149 | tag: dict[str, JoplinTag] 150 | notes: dict[str, JoplinNote] 151 | resources: dict[str, JoplinResource] 152 | internal_links: dict[str, JoplinInternalLink] 153 | 154 | def __init__(self, db_file: Path) -> None: 155 | self.db_file = db_file 156 | self.init_db() 157 | 158 | def init_db(self): 159 | """ 创建数据库 160 | """ 161 | self.conn = sqlite3.connect(self.db_file) 162 | test_table = "SELECT count(*) FROM sqlite_master WHERE type='table' AND name=?;" 163 | 164 | for table in ('l2f', 'note', 'resource', 'internal_link', 'tag', 'note_tag'): 165 | table_exists = self.conn.execute(test_table, (table, )).fetchone()[0] 166 | logger.info(f'表 {table} 是否存在: {table_exists}') 167 | if not table_exists: 168 | self.conn.executescript(self.CREATE_SQL[table]) 169 | 170 | def init_cache(self, documents: list[WizDocument]): 171 | # 下面的顺序需要严格保持 172 | # 将 location 转换成 folder 173 | self.convert_l2f(documents) 174 | self.load_folders() 175 | self.load_tags() 176 | self.load_resources() 177 | self.load_internal_links() 178 | self.load_notes() 179 | 180 | def close(self): 181 | self.conn.close() 182 | 183 | def build_location_to_top(self, location: str, document: Optional[WizDocument] = None): 184 | """ 构建一个 location 直到最顶端,并返回这个 location 对应的 l2f 对象 185 | """ 186 | l2f_inst = self.l2f_cache.get(location) 187 | if l2f_inst is None: 188 | l2f_inst = Location2Folder(location) 189 | self.l2f_cache[location] = l2f_inst 190 | self.conn.execute( 191 | 'INSERT INTO l2f(location, title, parent_location, level, id, parent_id) VALUES (:location, :title, :parent_location, :level, :id, :parent_id)', 192 | vars(l2f_inst) 193 | ) 194 | self.conn.commit() 195 | if l2f_inst is not None and l2f_inst.parent_location is not None: 196 | # 递归调用时,不传递 document 197 | self.build_location_to_top(l2f_inst.parent_location, None) 198 | # 仅当创建「最低端 folder」的时候才会更新 document 中的引用 199 | if document is not None: 200 | document.folder = l2f_inst 201 | # 获取最大的 level 202 | if l2f_inst.level > self.folder_max_level: 203 | self.folder_max_level = l2f_inst.level 204 | 205 | def convert_l2f(self, documents: list[WizDocument]) -> None: 206 | """ 将为知笔记中的所有 location 转换成中间格式,等待生成 Joplin Folder 207 | """ 208 | sql = 'SELECT location, title, parent_location, level, id, parent_id FROM l2f;' 209 | l2f_items = self.conn.execute(sql).fetchall() 210 | logger.info(f'在数据库 l2f 中找到 {len(l2f_items)} 条记录。') 211 | 212 | # 用 location 作为唯一 key 213 | self.l2f_cache = {} 214 | for l2f_item in l2f_items: 215 | self.l2f_cache[l2f_item[0]] = Location2Folder(*l2f_item) 216 | 217 | for document in documents: 218 | self.build_location_to_top(document.location, document) 219 | 220 | def get_folder(self, id: str=None, location: str=None) -> JoplinFolder: 221 | """ 根据 id 或者 location 获取一个 Joplin Folder 222 | """ 223 | if id: 224 | return self.folders.get(id) 225 | elif location: 226 | l2f = self.l2f_cache.get(location) 227 | if l2f is not None: 228 | return self.folders.get(l2f.id) 229 | return None 230 | 231 | def get_tags(self, guid: str) -> dict[str, JoplinTag]: 232 | """ 根据 guid 获取该 note 的所有 tag 233 | """ 234 | sql = 'SELECT tag_id, title FROM note_tag WHERE note_id=?;' 235 | items = self.conn.execute(sql, (guid, )).fetchall() 236 | logger.info(f'在数据库 note_tag 中找到 note {guid} 的 {len(items)} 条 tag 记录。') 237 | tag_dict: dict[str, JoplinTag] = {} 238 | for item in items: 239 | tag_id = item[1] 240 | tag_dict[tag_id] = self.tags[tag_id] 241 | return tag_dict 242 | 243 | def get_resources(self, links: dict[str, JoplinInternalLink]) -> dict[str, JoplinResource]: 244 | """ 根据内链获取对应的 resource 245 | """ 246 | resource_dict: dict[str, JoplinResource] = {} 247 | for jil in links.values(): 248 | resource = self.resources.get(jil.resource_id) 249 | if resource: 250 | resource_dict[jil.resource_id] = resource 251 | return resource_dict 252 | 253 | def get_internal_links(self, guid: str) -> dict[str, JoplinInternalLink]: 254 | sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link WHERE note_id=?;' 255 | items = self.conn.execute(sql, (guid, )).fetchall() 256 | logger.info(f'在数据库 internal_link 中找到 note {guid} 的 {len(items)} 条内链记录。') 257 | links = {} 258 | for item in items: 259 | # 优先从缓存中获取 jil 对象 260 | id = f'{item[0]}-{item[1]}' 261 | jil: JoplinInternalLink = self.internal_links.get(id, JoplinInternalLink(*item)) 262 | links[id] = jil 263 | return links 264 | 265 | def get_note(self, note_id: str) -> JoplinNote: 266 | return self.notes.get(note_id) 267 | 268 | def load_folders(self) -> None: 269 | """ 将数据库中的 JoplinFolder 载入 270 | 数据库中保存的是 Location2Folder 对象,将其转换成 JoplinFolder 271 | """ 272 | self.folders = {} 273 | for l2f in self.l2f_cache.values(): 274 | self.folders[l2f.id] = JoplinFolder(l2f.id, l2f.title, 0, 0, l2f.parent_id) 275 | 276 | def load_tags(self) -> None: 277 | """ 从数据库中载入已经创建的 tag 信息 278 | """ 279 | sql = 'SELECT tag_id, title, created_time, updated_time FROM tag;' 280 | tag_items = self.conn.execute(sql).fetchall() 281 | logger.info(f'在数据库 tag 中找到 {len(tag_items)} 条记录。') 282 | self.tags = {} 283 | for tag_item in tag_items: 284 | self.tags[tag_item[0]] = JoplinTag(*tag_item) 285 | 286 | def load_resources(self) -> None: 287 | sql = 'SELECT resource_id, title, filename, created_time, resource_type FROM resource;' 288 | items = self.conn.execute(sql).fetchall() 289 | logger.info(f'在数据库 resource 中找到 {len(items)} 条记录。') 290 | self.resources = {} 291 | for item in items: 292 | jr = JoplinResource(*item) 293 | self.resources[jr.id] = jr 294 | 295 | def load_notes(self) -> None: 296 | """ 从数据库中载入已经同步的 note 297 | """ 298 | sql = 'SELECT note_id, title, joplin_folder, markup_language, wiz_location FROM note;' 299 | items = self.conn.execute(sql).fetchall() 300 | logger.info(f'在数据库 note 中找到 {len(items)} 条记录。') 301 | self.notes = {} 302 | for item in items: 303 | jn = JoplinNote(item[0], item[1], item[2], item[3], location=item[4]) 304 | jn.folder = self.folders[jn.parent_id] 305 | jn.internal_links = self.get_internal_links(jn.id) 306 | jn.resources = self.get_resources(jn.internal_links) 307 | jn.tags = self.get_tags(jn.id) 308 | self.notes[jn.id] = jn 309 | 310 | def load_internal_links(self) -> None: 311 | sql = 'SELECT note_id, resource_id, title, link_type FROM internal_link;' 312 | items = self.conn.execute(sql).fetchall() 313 | logger.info(f'在数据库 internal_link 中找到 {len(items)} 条内链记录。') 314 | self.internal_links = {} 315 | for item in items: 316 | jil: JoplinInternalLink = JoplinInternalLink(*item) 317 | self.internal_links[jil.id] = jil 318 | 319 | def add_tag(self, tag: JoplinTag) -> None: 320 | """ 向数据库中加入一个没有创建过的 tag 321 | """ 322 | if self.tags.get(tag.id) is not None: 323 | logger.warning(f'tag {tag.id} |{tag.title}| 已经存在,不需要新增。') 324 | return 325 | sql = 'INSERT INTO tag (tag_id, title, created_time, updated_time) VALUES (?, ?, ?, ?);' 326 | self.conn.execute(sql, (tag.id, tag.title, tag.created_time, tag.updated_time)) 327 | self.tags[tag.id] = tag 328 | self.conn.commit() 329 | 330 | def add_resource(self, jr: JoplinResource) -> None: 331 | """ 向数据库中加入一个没有创建过的 resource 332 | """ 333 | if self.resources.get(jr.id) is not None: 334 | logger.warning(f'resource {jr.id} |{jr.title}| 已经存在,不需要新增。') 335 | return 336 | sql = 'INSERT INTO resource (resource_id, title, filename, created_time, resource_type) VALUES (?, ?, ?, ?, ?);' 337 | self.conn.execute(sql, (jr.id, jr.title, jr.filename, jr.created_time, jr.resource_type)) 338 | self.resources[jr.id] = jr 339 | self.conn.commit() 340 | 341 | def add_internal_lnk(self, jil: JoplinInternalLink) -> None: 342 | if self.internal_links.get(jil.id) is not None: 343 | logger.warning(f'internal_link {jil.id} |{jil.title}-{jil.link_type}| 已经在数据库中存在,不需要新增。') 344 | return 345 | sql = 'INSERT INTO internal_link (note_id, resource_id, title, link_type) VALUES (?, ?, ?, ?);' 346 | self.conn.execute(sql, (jil.note_id, jil.resource_id, jil.title, jil.link_type)) 347 | self.internal_links[jil.id] = jil 348 | self.conn.commit() 349 | 350 | def add_note_tag(self, note: JoplinNote, tag: JoplinTag) -> None: 351 | """ 增加一个 note 的 tag 352 | """ 353 | test_note_tag = "SELECT count(*) FROM note_tag WHERE note_id=? AND tag_id=?;" 354 | note_tag_item = self.conn.execute(test_note_tag, (note.id, tag.id)).fetchone() 355 | if note_tag_item: 356 | logger.warning(f'note {note.id}|{note.title}| 的 tag {tag.id}|{tag.title}| 已经存在!') 357 | return 358 | sql = 'INSERT INTO note_tag (note_id, tag_id, title, created_time) VALUES (?, ?, ?, ?);' 359 | self.conn.execute(sql, (note.id, tag.id, tag.title, tag.created_time)) 360 | self.conn.commit() 361 | 362 | def add_note(self, note: JoplinNote) -> None: 363 | if self.notes.get(note.id) is not None: 364 | logger.warning(f'note {note.id} |{note.title}| 已经在数据库中存在,不需要新增。') 365 | return 366 | sql = 'INSERT INTO note (note_id, title, joplin_folder, markup_language, wiz_location) VALUES (?, ?, ?, ?, ?);' 367 | self.conn.execute(sql, (note.id, note.title, note.parent_id, note.markup_language, note.location)) 368 | self.conn.commit() 369 | 370 | self.notes[note.id] = note 371 | for tag in note.tags.values(): 372 | self.add_note_tag(note, tag) 373 | for jil in note.internal_links.values(): 374 | self.add_internal_lnk(jil) 375 | 376 | def update_l2f(self, location: str, id: str, parent_id: Optional[str] = None): 377 | """ 更新 Folder 的 guid 到 l2f 对象中 378 | 每次更新都写入 db 379 | """ 380 | l2f_inst = self.l2f_cache[location] 381 | l2f_inst.id = id 382 | if parent_id is not None: 383 | l2f_inst.parent_id = parent_id 384 | self.conn.execute( 385 | 'UPDATE l2f SET parent_id=:parent_id, id=:id WHERE location=:location', 386 | vars(l2f_inst) 387 | ) 388 | self.conn.commit() 389 | 390 | def get_waiting_for_created_l2f(self) -> list[Location2Folder]: 391 | """ 按照 level 排序并返回 l2f 对象,level 低的必须先创建 392 | """ 393 | waiting_for_created = [v for v in self.l2f_cache.values() if v.id is None] 394 | waiting_for_created.sort(key=lambda l2f: l2f.level) 395 | return waiting_for_created 396 | 397 | 398 | class Adapter(object): 399 | """ 负责把为知笔记的对象转换成对应想 Joplin 笔记对象 400 | """ 401 | 402 | ws: WizStorage 403 | jda: JoplinDataAPI 404 | work_dir: Path 405 | cu: ConvertUtil 406 | 407 | def __init__(self, ws: WizStorage, jda: JoplinDataAPI, work_dir: Path=None) -> None: 408 | self.ws = ws 409 | self.jda = jda 410 | self.work_dir = work_dir or default_work_dir 411 | 412 | # 解析所有的文档 413 | self.ws.resolve() 414 | 415 | # 从数据库载入缓存 416 | self.cu = ConvertUtil(self.work_dir.joinpath('w2j.sqlite')) 417 | self.cu.init_cache(self.ws.documents) 418 | 419 | def sync_folders(self) -> None: 420 | """ 同步为知笔记的目录 到 Joplin Folder 421 | 在为知笔记中,目录不是一种资源,它直接在配置文件中定义,在数据库中仅作为 location 字段存在 422 | 而在 Joplin 中,目录是一种标准资源 https://joplinapp.org/api/references/rest_api/#item-type-ids 423 | """ 424 | waiting_created_l2f = self.cu.get_waiting_for_created_l2f() 425 | logger.info(f'有 {len(waiting_created_l2f)} 个 folder 等待同步。') 426 | for l2f in waiting_created_l2f: 427 | jf = None 428 | logger.info(f'处理 location {l2f.location}') 429 | # level1 没有父对象 430 | if l2f.parent_location is None: 431 | jf = self.jda.post_folder(title=l2f.title) 432 | self.cu.update_l2f(l2f.location, jf.id) 433 | else: 434 | parent_l2f: Location2Folder = self.cu.l2f_cache.get(l2f.parent_location) 435 | if parent_l2f is None: 436 | msg = f'找不到父对象 {l2f.parent_location}!' 437 | logger.error(msg) 438 | raise ValueError(msg) 439 | if parent_l2f.id is None: 440 | msg = f'父对象 {l2f.parent_location} 没有 id!' 441 | logger.error(msg) 442 | raise ValueError(msg) 443 | jf = self.jda.post_folder(title=l2f.title, parent_id=parent_l2f.id) 444 | self.cu.update_l2f(l2f.location, jf.id, jf.parent_id) 445 | # 更新了 l2f_cache 之后,要更新一次 folders 446 | self.cu.load_folders() 447 | 448 | def sync_tags(self) -> None: 449 | """ 同步为知笔记的 tag 到 Joplin Tag 450 | """ 451 | created_keys = self.cu.tags.keys() 452 | waiting_create_tags = [wt for wt in self.ws.tags if not tojoplinid(wt.guid) in created_keys] 453 | logger.info(f'为知笔记共有 {len(self.ws.tags)} 个 tag 。') 454 | logger.info(f'有 {len(waiting_create_tags)} 个 tag 等待同步。') 455 | for wt in waiting_create_tags: 456 | tag_id = tojoplinid(wt.guid) 457 | try: 458 | logger.info(f'处理 tag {wt.name} {tag_id}') 459 | jt = self.jda.post_tag(id=tag_id, title=wt.name, created_time=wt.modified, updated_time=wt.modified) 460 | self.cu.add_tag(jt) 461 | except ValueError as e: 462 | logger.error(e) 463 | # 由于加入的 tag 没有写入转换数据库导致的 guid 重复错误,此时需要将 tag 写入转换数据库 464 | if str(e).find('SQLITE_CONSTRAINT: UNIQUE constraint failed') > -1: 465 | jt = self.jda.get_tag(tag_id) 466 | self.cu.add_tag(jt) 467 | continue 468 | 469 | def _upload_wiz_attachment(self, attach: WizAttachment) -> JoplinResource: 470 | """ 上传一个未知附件 471 | """ 472 | resource_id = tojoplinid(attach.guid) 473 | jr: JoplinResource = self.cu.resources.get(resource_id) 474 | if jr is not None: 475 | logger.warning(f'resource {resource_id} |{jr.title}|已经存在!') 476 | return 477 | jr = self.jda.post_resource( 478 | attach.file, 479 | 1, 480 | id=tojoplinid(attach.guid), 481 | title=attach.name, 482 | filename=attach.name, 483 | created_time=attach.modified, 484 | updated_time=attach.modified 485 | ) 486 | self.cu.add_resource(jr) 487 | return jr 488 | 489 | def _upload_wiz_image(self, image: WizImage) -> JoplinResource: 490 | """ 上传一个为知图像 491 | """ 492 | jr: JoplinResource = self.jda.post_resource( 493 | image.file, 494 | 2, 495 | title=image.src, 496 | filename=image.src 497 | ) 498 | self.cu.add_resource(jr) 499 | return jr 500 | 501 | def _sync_note(self, document: WizDocument) -> JoplinNote: 502 | """ 同步一篇笔记 503 | """ 504 | logger.info(f'正在处理 document {document.guid}|{document.title}|。') 505 | note_id = tojoplinid(document.guid) 506 | jn: JoplinNote = self.cu.get_note(note_id) 507 | if jn is not None: 508 | logger.warning(f'note {jn.id} |{jn.title}| 已经存在!') 509 | return 510 | 511 | # 临时保存上传成功后生成的 Image 和 Attachment 对应的 Joplin Resource 512 | resources_in_note: dict[str, JoplinResource] = {} 513 | 514 | # 为知笔记中的图像不在内链中,附件也可能不在内链中,将它们全部加入内链。 515 | # 附件即使已经包含在内链中了,也需要在 body 末尾再加上一个内链 516 | joplin_internal_links: dict[str, JoplinInternalLink] = {} 517 | 518 | # 处理为知笔记文档中已经包含的内链 519 | for wil in document.internal_links: 520 | resource_id = tojoplinid(wil.guid) 521 | jil: JoplinInternalLink = JoplinInternalLink(note_id, resource_id, wil.title, wil.link_type, wil.outerhtml) 522 | joplin_internal_links[jil.id] = jil 523 | 524 | # 上传附件 525 | for attachment in document.attachments: 526 | jr: JoplinResource = self._upload_wiz_attachment(attachment) 527 | resources_in_note[jr.id] = jr 528 | 529 | jil_id = f'{note_id}-{jr.id}' 530 | jil: JoplinInternalLink = joplin_internal_links.get(jil_id) 531 | if jil is not None: 532 | logger.warning(f'内链关系 {jil_id} 已存在!') 533 | continue 534 | 535 | # 这个附件在附件列表中存在,但是在 body 中不存在,此时没有 outerhtml,需要在转换时将这个附件添加到 body 末尾 536 | jil: JoplinInternalLink = JoplinInternalLink(note_id, jr.id, jr.title, 'open_attachment') 537 | joplin_internal_links[jil.id] = jil 538 | 539 | # 上传图像,将每个文档中的图像生成为 Jopin 中的资源 540 | for image in document.images: 541 | jr: JoplinResource = self._upload_wiz_image(image) 542 | resources_in_note[jr.id] = jr 543 | jil: JoplinInternalLink = JoplinInternalLink(note_id, jr.id, jr.title, 'image', image.outerhtml) 544 | joplin_internal_links[jil.id] = jil 545 | 546 | # 创建一个 joplin note 并将 wiz document 的对应值存入 547 | body = convert_joplin_body( 548 | document.body, 549 | document.is_markdown, 550 | joplin_internal_links.values() 551 | ) 552 | 553 | folder = self.cu.get_folder(location=document.location) 554 | note: JoplinNote = self.jda.post_note(note_id, document.title, body, document.is_markdown, folder.id, document.url) 555 | note.internal_links = joplin_internal_links 556 | note.folder = folder 557 | note.tags = self.cu.get_tags(note.id) 558 | self.cu.add_note(note) 559 | 560 | return note 561 | 562 | def _get_locations(self, location: str, locations: list[str]) -> None: 563 | """ 获取一个 location 下的所有 location 564 | """ 565 | cur_l2f = self.cu.l2f_cache.get(location) 566 | if cur_l2f is None: 567 | raise ValueError(f'找不到 {location}') 568 | for l2f in self.cu.l2f_cache.values(): 569 | if l2f.parent_location and l2f.level > cur_l2f.level and l2f.parent_location == location: 570 | # print(f'{cur_l2f.level} {l2f.level} {self.cu.folder_max_level} {l2f.parent_location} {l2f.location} {location}') 571 | locations.append(l2f.location) 572 | self._get_locations(l2f.location, locations) 573 | 574 | def sync_note_by_location(self, location: str, with_children: bool=True) -> None: 575 | """ 同步指定为知笔记目录中所有的笔记 576 | """ 577 | self.sync_folders() 578 | self.sync_tags() 579 | locations = [location] 580 | if with_children: 581 | self._get_locations(location, locations) 582 | logger.info(f'处理以下 location: {locations}') 583 | waiting_for_sync = [wd for wd in self.ws.documents if wd.location in locations] 584 | logger.info(f'为知笔记目录 {location} 中有 {len(waiting_for_sync)} 篇笔记等待同步。') 585 | for wd in waiting_for_sync: 586 | self._sync_note(wd) 587 | 588 | def sync_all(self) -> None: 589 | """ 同步所有内容 590 | """ 591 | self.sync_folders() 592 | self.sync_tags() 593 | logger.info(f'为知笔记转换所有文档 {len(self.ws.documents)} 篇。') 594 | for wd in self.ws.documents: 595 | self._sync_note(wd) 596 | -------------------------------------------------------------------------------- /w2j/joplin.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j.joplin 3 | # 4 | # 处理 Joplin 相关 5 | # 结构查看 https://joplinapp.org/api/references/rest_api/ 6 | ############################## 7 | 8 | 9 | import json 10 | from pathlib import Path 11 | from typing import Optional, Union 12 | 13 | import httpx 14 | import sqlite3 15 | 16 | from w2j import logger 17 | from w2j.parser import JoplinInternalLink 18 | 19 | 20 | class JoplinFolder(object): 21 | """ Joplin 中的 notebook 22 | """ 23 | name = 'folder' 24 | type_ = 2 25 | 26 | # Folder 在 Joplin 数据库中的 guid 27 | id: str = None 28 | 29 | # Folder 名称 30 | title: str = None 31 | 32 | # 创建时间戳 33 | created_time: int = 0 34 | 35 | # 更新时间戳 36 | updated_time: int = 0 37 | 38 | # 如果有父 Folder,则值为其 ID 39 | parent_id: str = None 40 | 41 | # 所有必须的 fields 名称 42 | fields = ['id', 'title', 'created_time', 'updated_time', 'parent_id'] 43 | 44 | def __init__(self, id: str, title: str, created_time: int, updated_time: int, parent_id: Optional[str] = None, **kwargs) -> None: 45 | self.id = id 46 | self.title = title 47 | self.created_time = created_time 48 | self.updated_time = updated_time 49 | self.parent_id = parent_id 50 | 51 | @classmethod 52 | def fields_str(cls) -> str: 53 | return ','.join(cls.fields) 54 | 55 | def __repr__(self) -> str: 56 | return f'' 57 | 58 | 59 | class JoplinResource(object): 60 | """ Joplin 中的 Resource 61 | """ 62 | name = 'resource' 63 | type_ = 4 64 | 65 | id: str = None 66 | title: str = None 67 | filename: str = None 68 | file_extension: str = None 69 | created_time: int = None 70 | updated_time: int = None 71 | 72 | # 1 附件,2 文中图像 73 | resource_type: int 74 | 75 | # 所有必须的 fields 名称 76 | fields = ['id', 'title', 'created_time', 'updated_time', 'filename', 'file_extension'] 77 | 78 | @classmethod 79 | def fields_str(cls) -> str: 80 | return ','.join(cls.fields) 81 | 82 | def __init__(self, id: str, title: str, filename: str, created_time: int, resource_type: int, **kwargs) -> None: 83 | self.id = id 84 | self.title = title 85 | self.filename = filename 86 | self.resource_type = resource_type 87 | self.created_time = created_time 88 | if kwargs.get('file_extension'): 89 | self.file_extension = kwargs.get('file_extension') 90 | if kwargs.get('updated_time'): 91 | self.updated_time = kwargs.get('updated_time') 92 | 93 | def __repr__(self) -> str: 94 | return f'' 95 | 96 | 97 | class JoplinTag(object): 98 | """ Joplin 中的 tag 99 | """ 100 | name = 'tag' 101 | type_ = 5 102 | 103 | id: str = None 104 | title: str = None 105 | parent_id: str = None 106 | created_time: int = 0 107 | updated_time: int = 0 108 | 109 | # 所有必须的 fields 名称 110 | fields = ['id', 'title', 'created_time', 'updated_time'] 111 | 112 | @classmethod 113 | def fields_str(cls): 114 | return ','.join(cls.fields) 115 | 116 | def __init__(self, id: str, title: str, created_time:int, updated_time: int = 0, **kwargs) -> None: 117 | self.id = id 118 | self.title = title 119 | self.created_time = created_time 120 | self.updated_time = created_time if updated_time == 0 else updated_time 121 | 122 | def __repr__(self) -> str: 123 | return f'' 124 | 125 | 126 | class JoplinNote(object): 127 | """ 创建一个 Joplin 的 Note 类 128 | """ 129 | # Joplin 中的 note id,32 位 130 | id: str = None 131 | 132 | # Joplin 中的 note title 133 | title: str = None 134 | 135 | # Joplin 中的 body 136 | body: str = None 137 | 138 | # Joplin 中的创建时间戳,毫秒 139 | created_time: int = None 140 | 141 | # Joplin 中的更新时间戳,毫秒 142 | updated_time: int = None 143 | 144 | # Joplin 中的文章 url 145 | source_url: str = None 146 | 147 | # 1 代表 markdown,2代表 html 148 | markup_language: int = 1 149 | 150 | location: str = '' 151 | parent_id: str = '' 152 | 153 | tags: dict[str, JoplinTag] = {} 154 | resources: dict[str, JoplinResource] = {} 155 | internal_links: dict[JoplinInternalLink] = {} 156 | folder: JoplinFolder = None 157 | 158 | # 所有必须的 fields 名称 159 | fields = ['id', 'title', 'parent_id', 'created_time', 'updated_time', 'body', 'source_url', 'markup_lanaguage'] 160 | 161 | def __init__(self, id: str, title: str, parent_id: str, markup_language: int, **kwargs) -> None: 162 | self.id = id 163 | self.title = title 164 | self.parent_id = parent_id 165 | self.markup_language = markup_language 166 | if kwargs.get('location'): 167 | self.location = kwargs.get('location') 168 | if kwargs.get('source_url'): 169 | self.source_url = kwargs.get('source_url') 170 | if kwargs.get('created_time'): 171 | self.created_time = kwargs.get('created_time') 172 | if kwargs.get('updated_time'): 173 | self.updated_time = kwargs.get('updated_time') 174 | 175 | @classmethod 176 | def fields_str(cls): 177 | return ','.join(cls.fields) 178 | 179 | def __repr__(self) -> str: 180 | return f'' 181 | 182 | 183 | class JoplinDataAPI(object): 184 | 185 | host: str 186 | port: int 187 | token: str 188 | base_url: str 189 | 190 | client: httpx.Client = None 191 | 192 | def __init__(self, host: str = '127.0.0.1', port: int = 41184, token: str = 'ad9b597aac8c9fa2083cb23c4354eb589b1252e6a366185d94795077ed076dfdb312c22b8640a05e5af7b784d65d831a429771e3cc2bcbe3f9cdac441d6fcca6') -> None: 193 | self.host = host 194 | self.port = port 195 | self.token = token 196 | self.base_url = f'http://{self.host}:{self.port}' 197 | self.client = httpx.Client(base_url=self.base_url, timeout=100) 198 | 199 | def _build_query(self, **kwargs): 200 | return httpx.QueryParams(token=self.token, **kwargs) 201 | 202 | def _check_pagination(self, page: int, paginated_resp: httpx.Response) -> tuple[list, bool, int]: 203 | """ 专门处理分页 204 | :returns: items, has_more, next_page 205 | """ 206 | data = paginated_resp.json() 207 | if data.get('error'): 208 | raise ValueError(data['error']) 209 | has_more = data['has_more'] 210 | next_page = page 211 | if has_more: 212 | next_page += 1 213 | return data['items'], has_more, next_page 214 | 215 | def close(self): 216 | self.client.close() 217 | 218 | def ping(self) -> bool: 219 | resp = self.client.get('/ping') 220 | return resp.text == 'JoplinClipperServer' 221 | 222 | def search(self, query: str, type_: str) -> httpx.Response: 223 | return self.client.get('/search') 224 | 225 | def get_folder(self, guid: str) -> JoplinFolder: 226 | """ 根据 guid 获取 folder 227 | """ 228 | query = self._build_query() 229 | resp = self.client.get(f'/folders/{guid}', params=query) 230 | data = resp.json() 231 | if data.get('error'): 232 | raise ValueError(data['error']) 233 | return JoplinFolder(**data) 234 | 235 | # https://joplinapp.org/api/references/rest_api/#pagination 236 | # https://joplinapp.org/api/references/rest_api/#get-folders 237 | # order_by=updated_time&order_dir=ASC&limit=10&page=2 238 | def get_folders(self, order_by: str='updated_time', order_dir: str='ASC', limit: int=100, page: int=1) -> \ 239 | tuple[ list[JoplinFolder], bool, int]: 240 | """ 获取一组 folder,支持分页 241 | :returns: joplin folder list, has_more, next_page 242 | """ 243 | folders: list[JoplinFolder] = [] 244 | 245 | def __build_query(page: int) -> httpx.QueryParams: 246 | return self._build_query(order_by=order_by, order_dir=order_dir, page=page, limit=limit, fields=JoplinFolder.fields_str()) 247 | 248 | def __get_folders(query: httpx.QueryParams) -> tuple[bool, int]: 249 | resp = self.client.get('/folders', params=query) 250 | items, has_more, next_page = self._check_pagination(int(query.get('page')), resp) 251 | for item in items: 252 | folders.append(JoplinFolder(**item)) 253 | return has_more, next_page 254 | 255 | if page > 0: 256 | query = __build_query(page) 257 | has_more, next_page = __get_folders(query) 258 | return folders, has_more, next_page 259 | 260 | # 小于等于 0 的 page 代表获取全部 261 | page = 1 262 | query = __build_query(page) 263 | has_more, next_page = __get_folders(query) 264 | while(has_more): 265 | query = __build_query(next_page) 266 | has_more, next_page = __get_folders(query) 267 | return folders, has_more, next_page 268 | 269 | def get_folder_note(self, guid: str) -> list[JoplinFolder]: 270 | """ 获取一个 folder 下的所有 note 271 | """ 272 | query = self._build_query(fields=JoplinNote.fields_str()) 273 | resp = self.client.get('/folders/{guid}/notes', params=query) 274 | return JoplinNote(**resp.json()) 275 | 276 | def post_folder(self, **kwargs) -> JoplinFolder: 277 | """ 创建一个新的 folder 278 | """ 279 | query = self._build_query() 280 | logger.info(f'向 Joplin 增加 folder {kwargs}') 281 | resp = self.client.post('/folders', params=query, json=kwargs) 282 | data = resp.json() 283 | if data.get('error'): 284 | logger.error(data['error']) 285 | raise ValueError(data['error']) 286 | return JoplinFolder(**data) 287 | 288 | def post_tag(self, **kwargs) -> JoplinTag: 289 | """ 创建一个新的 tag 290 | """ 291 | query = self._build_query() 292 | logger.info(f'向 Joplin 增加 tag {kwargs}') 293 | resp = self.client.post('/tags', params=query, json=kwargs) 294 | data = resp.json() 295 | if data.get('error'): 296 | logger.error(data['error']) 297 | raise ValueError(data['error']) 298 | return JoplinTag(**data) 299 | 300 | def get_tag(self, guid: str) -> JoplinTag: 301 | """ 根据 guid 获取 tag 302 | """ 303 | query = self._build_query(fields=JoplinTag.fields_str()) 304 | resp = self.client.get(f'/tags/{guid}', params=query) 305 | data = resp.json() 306 | logger.info(f'从 Joplin 获取 tag {guid}: {data}') 307 | if data.get('error'): 308 | logger.error(data['error']) 309 | raise ValueError(data['error']) 310 | return JoplinTag(**data) 311 | 312 | def post_resource(self, file: Path, resource_type: int, **kwargs) -> JoplinResource: 313 | """ 创建一个新的 resources 314 | """ 315 | query = self._build_query() 316 | files = {'data': open(file, 'rb')} 317 | # 经过测试 props 中只有 title 和 id 有作用,其他的参数都无效 318 | data = {'props': json.dumps(kwargs)} 319 | logger.info(f'向 Joplin 增加 resource {file} {kwargs}') 320 | resp = self.client.post('/resources', params=query, files=files, data=data) 321 | data = resp.json() 322 | if data.get('error'): 323 | logger.error(data['error']) 324 | raise ValueError(data['error']) 325 | return JoplinResource(**data, resource_type=resource_type) 326 | 327 | def get_resource(self, guid: str) -> JoplinResource: 328 | """ 根据 guid 获取 resource 329 | """ 330 | query = self._build_query(fields=JoplinResource.fields_str()) 331 | resp = self.client.get(f'/resources/{guid}', params=query) 332 | data = resp.json() 333 | logger.info(f'从 Joplin 获取 resource {guid}: {data}') 334 | if data.get('error'): 335 | logger.error(data['error']) 336 | raise ValueError(data['error']) 337 | return JoplinResource(**data) 338 | 339 | def post_note(self, id: str, title: str, body: str, 340 | is_markdown: bool, parent_id: str, source_url: str) -> JoplinNote: 341 | """ 创建一个新的 Note 342 | 隐藏的 Joplin 参数:通过抓包 Joplin WebClipper 343 | 344 | complete Page Html 345 | source_command 346 | { 347 | 'name': 'completePageHtml', 348 | 'preProcessFor': 'html' 349 | } 350 | convert_to = html 351 | 352 | simplified Page Html 353 | source_command 354 | { 355 | 'name': 'simplifiedPageHtml', 356 | } 357 | convert_to = markdown 358 | 359 | complete page 360 | source_command = markdown 361 | { 362 | 'name': 'completePageHtml', 363 | 'preProcessFor': 'markdown' 364 | } 365 | convert_to = markdown 366 | """ 367 | kwargs = { 368 | 'id': id, 369 | 'title': title, 370 | 'parent_id': parent_id, 371 | 'markup_language': 1, 372 | } 373 | if source_url: 374 | kwargs['source_url'] = source_url 375 | if is_markdown: 376 | kwargs['body'] = body 377 | else: 378 | # 使用 joplin 的功能将所有的 html 都转换成 markdown 379 | kwargs['body_html'] = body 380 | kwargs['convert_to'] = 'markdown' 381 | kwargs['source_command'] = { 382 | 'name': 'simplifiedPageHtml', 383 | } 384 | 385 | query = self._build_query() 386 | logger.info(f'向 Joplin 增加 note {kwargs}') 387 | resp = self.client.post('/notes', params=query, json=kwargs) 388 | data = resp.json() 389 | if data.get('error'): 390 | logger.error(data['error']) 391 | raise ValueError(data['error']) 392 | return JoplinNote(**data) 393 | 394 | def get_note(self, guid: str) -> JoplinNote: 395 | """ 根据 guid 获取 note 396 | """ 397 | query = self._build_query(fields=JoplinNote.fields_str()) 398 | resp = self.client.get(f'/notes/{guid}', params=query) 399 | data = resp.json() 400 | logger.info(f'从 Joplin 获取 note {guid}: {data}') 401 | if data.get('error'): 402 | raise ValueError(data['error']) 403 | return JoplinNote(**data) 404 | 405 | 406 | class JoplinStorage(object): 407 | """ 保存 Joplin 数据 408 | """ 409 | # joplin 资源所在文件夹 410 | joplin_dir: Path 411 | 412 | # joplin 主数据库 413 | db_file: Path 414 | 415 | def __init__(self, joplin_dir: Path) -> None: 416 | self.joplin_dir = joplin_dir 417 | self.db_file = self.joplin_dir.joinpath('database.sqlite') 418 | 419 | def update_time(self, wiz_document_times: list[dict[str, Union[str, int]]]): 420 | """ 根据为知笔记的文章更新时间修改 Joplin note 的时间 421 | """ 422 | self.conn = sqlite3.connect(self.db_file) 423 | sql = "UPDATE notes SET created_time=:created_time, updated_time=:updated_time, user_created_time=:created_time, user_updated_time=:updated_time WHERE id=:id;" 424 | # for wdt in wiz_document_times: 425 | # print(wdt) 426 | cursor = self.conn.executemany(sql, wiz_document_times) 427 | print(cursor.rowcount) 428 | self.conn.commit() 429 | self.conn.close() -------------------------------------------------------------------------------- /w2j/parser.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j.parser 3 | # 解析器,解析 html 源码 4 | ############################## 5 | 6 | from datetime import datetime, timezone, timedelta 7 | from os import link 8 | from pathlib import Path 9 | import re 10 | import chardet 11 | from inscriptis import get_text 12 | 13 | 14 | RE_A_START = r'([^<]+)' 16 | 17 | # 附件内链 18 | # 早期的链接没有双斜杠 19 | # wiz:open_attachment?guid=8337764c-f89d-4267-bdf2-2e26ff156098 20 | # 后期的链接有双斜杠 21 | # wiz://open_attachment?guid=52935f17-c1bb-45b7-b443-b7ba1b6f854e 22 | RE_OPEN_ATTACHMENT_HREF = r'wiz:/{0,2}(open_\w+)\?guid=([a-z0-9\-]{36})' 23 | RE_OPEN_ATTACHMENT_OUTERHTML = RE_A_START + RE_OPEN_ATTACHMENT_HREF + RE_A_END 24 | 25 | # 文档内链,只需要提取 guid 后面的部分即可 26 | # wiz://open_document?guid=c6204f26-f966-4626-ad41-1b5fbdb6829e&kbguid=&private_kbguid=69899a48-dc52-11e0-892c-00237def97cc 27 | RE_OPEN_DOCUMENT_HREF = r'wiz:/{0,2}(open_\w+)\?guid=([a-z0-9\-]{36})&kbguid=&private_kbguid=([a-z0-9\-]{36})' 28 | RE_OPEN_DOCUMENT_OUTERHTML = RE_A_START + RE_OPEN_DOCUMENT_HREF + RE_A_END 29 | 30 | 31 | # 图像文件在 body 中存在的形式,即使是在 .md 文件中,也依然使用这种形式存在 32 | RE_IMAGE_OUTERHTML = r']*>' 33 | 34 | 35 | class WizInternalLink(object): 36 | """ 嵌入 html 正文中的为知笔记内部链接,可能是笔记,也可能是附件 37 | """ 38 | # 原始链接的整个 HTML 内容,包括 名称 39 | outerhtml: str = None 40 | 41 | # 链接的 title 42 | title: str = None 43 | 44 | # 原始链接中的资源 guid,可能是 attachemnt 或者是 document 45 | guid: str = None 46 | 47 | # 值为 open_attachment 或者 open_document 48 | link_type: str = 'open_attachment' 49 | 50 | def __init__(self, outerhtml: str, guid: str, title: str, link_type: str) -> None: 51 | self.outerhtml = outerhtml 52 | self.guid = guid 53 | self.title = title 54 | self.link_type = link_type 55 | 56 | def __repr__(self) -> str: 57 | return f'' 58 | 59 | 60 | class WizImage(object): 61 | """ 在为知笔记文章中包含的本地图像 62 | 63 | 在为知笔记中,本地图像不属于资源,也没有自己的 guid 64 | """ 65 | # 原始图像的整个 HTML 内容,包括 66 | outerhtml: str = None 67 | 68 | # 仅包含图像的 src 部分 69 | src: str = None 70 | 71 | # 图像文件的 Path 对象,在硬盘上的路径 72 | file: Path = None 73 | 74 | def __init__(self, outerhtml: str, src: str, note_extract_dir: Path) -> None: 75 | self.outerhtml = outerhtml 76 | self.src = src 77 | self.file = note_extract_dir.joinpath(src) 78 | 79 | if not self.file.exists(): 80 | raise FileNotFoundError(f'找不到文件 {self.file}!') 81 | 82 | def __repr__(self) -> str: 83 | return f'' 84 | 85 | 86 | def parse_wiz_html(note_extract_dir: Path, title: str) -> tuple[str, list[WizInternalLink], list[WizImage]]: 87 | """ 在为知笔记文档的 index.html 中搜索内链的附件和文档链接 88 | """ 89 | index_html = note_extract_dir.joinpath('index.html') 90 | if not index_html.is_file: 91 | raise FileNotFoundError(f'主文档文件不存在! {index_html} |{title}|') 92 | html_body_bytes = index_html.read_bytes() 93 | # 早期版本的 html 文件使用的是 UTF-16 LE(BOM) 编码保存。最新的文件是使用 UTF-8(BOM) 编码保存。要判断编码进行解析 94 | enc = chardet.detect(html_body_bytes) 95 | html_body = html_body_bytes.decode(encoding=enc['encoding']) 96 | 97 | # 去掉换行符,早期版本的 html 文件使用了 \r\n 换行符,而且会切断 html 标记。替换掉换行符方便正则 98 | html_body = html_body.replace('\r\n', '') 99 | html_body = html_body.replace('\n', '') 100 | 101 | internal_links: list[WizInternalLink] = [] 102 | 103 | open_attachments = re.finditer(RE_OPEN_ATTACHMENT_OUTERHTML, html_body, re.IGNORECASE) 104 | for open_attachement in open_attachments: 105 | link = WizInternalLink( 106 | open_attachement.group(0), 107 | open_attachement.group(2), 108 | open_attachement.group(3), 109 | open_attachement.group(1)) 110 | internal_links.append(link) 111 | 112 | open_documents = re.finditer(RE_OPEN_DOCUMENT_OUTERHTML, html_body, re.IGNORECASE) 113 | for open_document in open_documents: 114 | link = WizInternalLink( 115 | open_document.group(0), 116 | open_document.group(2), 117 | open_document.group(4), 118 | open_document.group(1)) 119 | internal_links.append(link) 120 | 121 | images: list[WizImage] = [] 122 | image_match = re.finditer(RE_IMAGE_OUTERHTML, html_body, re.IGNORECASE) 123 | for image in image_match: 124 | img = WizImage(image.group(0), image.group(1), note_extract_dir) 125 | images.append(img) 126 | return html_body, internal_links, images 127 | 128 | 129 | def tots(dt: str): 130 | """ 转换本地时间到时间戳,数据库中记录的是东八区本地时间 131 | """ 132 | return int(datetime.strptime(dt, '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone(timedelta(hours=8))).timestamp()*1000) 133 | 134 | 135 | def towizid(id: str) -> str: 136 | """ 从 joplin 的 id 格式转为 wiz 的 guid 格式 137 | """ 138 | one = id[:8] 139 | two = id[8:12] 140 | three = id[12:16] 141 | four = id[16:20] 142 | five = id[20:] 143 | return '-'.join([one, two, three, four, five]) 144 | 145 | 146 | def tojoplinid(guid: str) -> str: 147 | """ 从 wiz 的 guid 格式转为 joplin 的 id 格式 148 | """ 149 | return ''.join(guid.split('-')) 150 | 151 | 152 | class JoplinInternalLink(object): 153 | """ 与 Wiz 内链不同,Joplin 内链包括 附件(链接到 resource)、图像(链接到 resource)、文档(链接到 note) 154 | """ 155 | note_id: str 156 | resource_id: str 157 | 158 | # image / open_attachment / open_document 159 | link_type: str 160 | 161 | # 链接的 title 162 | title: str = None 163 | 164 | # 链接的整个文本内容,可能是 markdown 格式也可能是html格式,取决于 note_id 是何种格式 165 | outertext: str 166 | 167 | def __init__(self, note_id: str, resource_id: str, title: str, link_type: int, outertext:str='') -> None: 168 | self.note_id = note_id 169 | self.resource_id = resource_id 170 | self.title = title 171 | self.link_type = link_type 172 | self.outertext = outertext 173 | 174 | @property 175 | def id(self) -> str: 176 | return f'{self.note_id}-{self.resource_id}' 177 | 178 | 179 | def gen_ilstr(is_markdown: bool, jil: JoplinInternalLink) -> str: 180 | """ 返回被替换的内链 181 | ilstr = internal link str 182 | """ 183 | if is_markdown: 184 | body = f'[{jil.title}](:/{jil.resource_id})' 185 | if jil.link_type == 'image': 186 | return '!' + body 187 | return body 188 | if jil.link_type == 'image': 189 | return f'{jil.title}' 190 | return f'{jil.title}' 191 | 192 | 193 | def gen_end_ilstr(is_markdown: bool, jils: list[JoplinInternalLink]): 194 | """ 返回 body 底部要加入的内容 195 | ilstr = internal link str 196 | """ 197 | if is_markdown: 198 | return '\n\n# 附件链接\n\n' + '\n'.join([ '- ' + gen_ilstr(is_markdown, jil) for jil in jils]) 199 | body = ''.join([ f'
  • {gen_ilstr(is_markdown, jil)}
  • ' for jil in jils]) 200 | return f'

    附件链接

      {body}
    ' 201 | 202 | 203 | def convert_joplin_body(body: str, is_markdown: bool, internal_links: list[JoplinInternalLink]) -> str: 204 | """ 将为知笔记中的 body 转换成 Joplin 内链 205 | """ 206 | insert_to_end: list[JoplinInternalLink] = [] 207 | for jil in internal_links: 208 | # 替换链接 209 | if jil.outertext: 210 | body = body.replace(jil.outertext, gen_ilstr(is_markdown, jil)) 211 | # 所有的附件,需要在body 底部加入链接 212 | if jil.link_type == 'open_attachment': 213 | insert_to_end.append(jil) 214 | # 处理 markdown 转换 215 | if is_markdown: 216 | body = get_text(body) 217 | if insert_to_end: 218 | body += gen_end_ilstr(is_markdown, insert_to_end) 219 | return body -------------------------------------------------------------------------------- /w2j/wiz.py: -------------------------------------------------------------------------------- 1 | ############################## 2 | # w2j.wiz 3 | # 处理为知笔记相关 4 | ############################## 5 | 6 | from os import PathLike 7 | from typing import Any, Optional 8 | from pathlib import Path 9 | import sqlite3 10 | from zipfile import ZipFile, BadZipFile 11 | 12 | from w2j import logger, work_dir as default_work_dir 13 | from w2j.parser import parse_wiz_html, tots, WizInternalLink, WizImage 14 | 15 | 16 | class WizAttachment(object): 17 | """ 为知笔记附件 18 | 19 | 在为知笔记中,附件属于一种资源,拥有自己的 guid 20 | """ 21 | # 附件的 guid 22 | guid: str = None 23 | 24 | # 附件所属的文档 guid 25 | doc_guid: str = None 26 | 27 | # 附件的名称,一般是文件名 28 | name: str = None 29 | 30 | # 附件在硬盘上的文件名,格式为 {guid}name 31 | file_name: str = None 32 | 33 | # 附件的修改时间 34 | modified: int = 0 35 | 36 | # 附件的文件名所在地 37 | file: Path = None 38 | 39 | def __init__(self, guid: str, doc_guid: str, name: str, modified: str, attachments_dir: Path, check_file: bool = False) -> None: 40 | self.guid = guid 41 | self.doc_guid = doc_guid 42 | self.name = name 43 | self.modified = tots(modified) 44 | self.file_name =f'{{{self.guid}}}{self.name}' 45 | 46 | self.file = attachments_dir.joinpath(self.file_name) 47 | if check_file: 48 | self.check_file() 49 | 50 | def check_file(self): 51 | """ 检测附件是否存在 52 | """ 53 | if not self.file.exists(): 54 | raise FileNotFoundError(f'找不到文件 {self.file_name}') 55 | 56 | def __repr__(self) -> str: 57 | return f'' 58 | 59 | 60 | class WizTag(object): 61 | """ 为知笔记 TAG 62 | """ 63 | # tag 的 guid 64 | guid: str = None 65 | 66 | name: str = None 67 | 68 | modified: int = 0 69 | 70 | def __init__(self, guid, name, modified) -> None: 71 | self.guid = guid 72 | self.name = name 73 | self.modified = tots(modified) 74 | 75 | def __repr__(self) -> str: 76 | return f'' 77 | 78 | 79 | class WizDocument(object): 80 | """ 为知笔记文档 81 | """ 82 | # 文档的 guid 83 | guid: str = None 84 | title: str = None 85 | 86 | # 文件夹,为知笔记的文件夹就是一个用 / 分隔的字符串 87 | location: str = None 88 | 89 | # 保存一个 Folder 对象,这个对象在 Adapter 进行 Location 到 Folder 的转换之后才会填充 90 | folder: Any = None 91 | 92 | url: str = None # 如果文档是采集的,则这个地址为文档的采集url 93 | 94 | created: int = 0 95 | 96 | modified: int = 0 97 | 98 | # 从数据库中读取的附件数量,如果大于 0 说明这个文档有附件 99 | attachment_count: int = 0 100 | 101 | # 文档压缩包 102 | note_file: Path = None 103 | 104 | # 文档压缩包解压到的路径 105 | note_extract_dir: Path = None 106 | 107 | # 文档解压到的主文件夹 108 | documents_dir: Path 109 | 110 | # 文档正文 111 | body: str = None 112 | 113 | # markdown,默认为 markdown 114 | is_markdown: bool = True 115 | 116 | # 文档的标签 117 | tags: list[WizTag] = [] 118 | 119 | # 文档的附件 120 | attachments: list[WizAttachment] = [] 121 | 122 | # 包含在为知笔记文档中的图像文件,需要在文档正文中使用正则提取 123 | images: list[WizImage] = [] 124 | 125 | # 包含在为知笔记文档中的内部链接,需要在文档征文中使用正则提取 126 | internal_links: list[WizInternalLink] = [] 127 | 128 | def __init__(self, guid: str, title: str, location: str, url: str, created: str, modified: str, attachment_count: int, notes_dir: Path, documents_dir: Path, check_file: bool = False) -> None: 129 | self.guid = guid 130 | self.location = location 131 | self.url = url 132 | self.created = tots(created) 133 | self.modified = tots(modified) 134 | self.attachment_count = attachment_count 135 | 136 | self.documents_dir = documents_dir 137 | 138 | self.is_markdown = title.endswith('.md') 139 | if self.is_markdown and len(title) > 3: 140 | self.title = title[:-3] 141 | else: 142 | self.title = title 143 | 144 | self.note_file = notes_dir.joinpath(f'{{{self.guid}}}') 145 | if check_file: 146 | self.check_note_file() 147 | 148 | def check_note_file(self): 149 | if self.note_file is None or not self.note_file.exists(): 150 | raise FileNotFoundError(f'找不到 note 文件 {self.note_file}!') 151 | 152 | def resolve_attachments(self, attachments: list[WizAttachment]) -> None: 153 | self.attachments = attachments 154 | if len(self.attachments) != self.attachment_count: 155 | raise ValueError(f'附件数量不匹配 {len(self.attachments)} != {self.attachment_count}!') 156 | # 检测所有附件文件是否存在 157 | try: 158 | for attach in self.attachments: 159 | attach.check_file() 160 | except FileNotFoundError as e: 161 | msg = f'{e!s},请检查文档 {self.title}' 162 | raise FileNotFoundError(msg) 163 | 164 | def resolve_tags(self, tags: list[WizTag]) -> None: 165 | self.tags = tags 166 | 167 | def _extract_zip(self) -> None: 168 | """ 解压缩当前文档的 zip 文件到 work_dir,以 guid 为子文件夹名称 169 | """ 170 | self.note_extract_dir = self.documents_dir.joinpath(self.guid) 171 | # 如果目标文件夹已经存在,就不解压了 172 | if self.note_extract_dir.exists(): 173 | # logger.info(f'{self.note_extract_dir!s} |{self.title}| 已经存在,跳过。') 174 | return 175 | try: 176 | zip_file = ZipFile(self.note_file) 177 | zip_file.extractall(self.note_extract_dir) 178 | except BadZipFile as e: 179 | msg = f'ZIP 文件错误,可能是需要密码。 {self.note_file!s} |{self.title}|' 180 | raise BadZipFile(msg) 181 | # logger.info(msg) 182 | 183 | def _parse_wiz_note(self) -> None: 184 | """ 解析 index.html 文件 185 | """ 186 | if self.note_extract_dir is None: 187 | raise FileNotFoundError(f'请先解压缩文档 {self.note_file!s} |{self.title}|') 188 | 189 | self.body, self.internal_links, self.images = parse_wiz_html(self.note_extract_dir, self.title) 190 | 191 | def resolve_body(self) -> None: 192 | """ 解压文档压缩包,解析文档正文中的图像文件,将其转换为 WizImage 193 | 将正文存入 body 194 | """ 195 | self.check_note_file() 196 | self._extract_zip() 197 | self._parse_wiz_note() 198 | 199 | def resolve(self, attachments: list[WizAttachment], tags: list[WizTag]) -> None: 200 | self.resolve_attachments(attachments) 201 | self.resolve_tags(tags) 202 | self.resolve_body() 203 | 204 | def __repr__(self): 205 | return f'' 206 | 207 | 208 | class DataDir(object): 209 | """ 保存 data 文件夹中的 Path 对象 210 | """ 211 | def __init__(self, data_dir: Path): 212 | self.data_dir = data_dir 213 | 214 | self.attachments_dir = self.data_dir.joinpath('attachments/') 215 | if not self.attachments_dir.is_dir(): 216 | raise FileNotFoundError(f'找不到文件夹 {self.attachments_dir.resolve()}!') 217 | 218 | self.notes_dir = self.data_dir.joinpath('notes/') 219 | if not self.notes_dir.is_dir(): 220 | raise FileNotFoundError(f'找不到文件夹 {self.notes_dir.resolve()}!') 221 | 222 | self.index_db = self.data_dir.joinpath('index.db') 223 | if not self.index_db.exists(): 224 | raise FileNotFoundError(f'找不到数据库 {self.index_db.resolve()}!') 225 | 226 | self.wizthumb_db = self.data_dir.joinpath('wizthumb.db') 227 | if not self.wizthumb_db.exists(): 228 | raise FileNotFoundError(f'找不到数据库 {self.wizthumb_db.resolve()}!') 229 | 230 | def _get_one_document(self, guid: str) -> tuple[Optional[tuple], list, list]: 231 | conn = sqlite3.connect(self.index_db) 232 | cur = conn.cursor() 233 | 234 | sql = '''SELECT 235 | DOCUMENT_GUID, DOCUMENT_TITLE, DOCUMENT_LOCATION, DOCUMENT_URL, DT_CREATED, DT_MODIFIED, DOCUMENT_ATTACHEMENT_COUNT 236 | FROM WIZ_DOCUMENT 237 | WHERE DOCUMENT_GUID = ? 238 | ''' 239 | cur.execute(sql, (guid, )) 240 | document_row = cur.fetchone() 241 | attachment_rows = [] 242 | tag_rows = [] 243 | 244 | if document_row: 245 | sql = '''SELECT 246 | ATTACHMENT_GUID, DOCUMENT_GUID, ATTACHMENT_NAME, DT_INFO_MODIFIED 247 | FROM WIZ_DOCUMENT_ATTACHMENT 248 | WHERE DOCUMENT_GUID = ? 249 | ''' 250 | cur.execute(sql, (guid, )) 251 | attachment_rows = cur.fetchall() 252 | 253 | sql = '''SELECT 254 | WIZ_TAG.TAG_GUID, WIZ_TAG.TAG_NAME, WIZ_TAG.DT_MODIFIED 255 | FROM WIZ_DOCUMENT_TAG INNER JOIN WIZ_TAG 256 | ON WIZ_DOCUMENT_TAG.TAG_GUID = WIZ_TAG.TAG_GUID 257 | WHERE WIZ_DOCUMENT_TAG.DOCUMENT_GUID = ? 258 | ''' 259 | cur.execute(sql, (guid, )) 260 | tag_rows = cur.fetchall() 261 | 262 | conn.close() 263 | return document_row, attachment_rows, tag_rows 264 | 265 | def _get_all_document(self): 266 | """ 获取 WIZ_DUCUMENT 的所有记录 267 | """ 268 | conn = sqlite3.connect(self.index_db) 269 | cur = conn.cursor() 270 | cur.execute('SELECT DOCUMENT_GUID, DOCUMENT_TITLE, DOCUMENT_LOCATION, DOCUMENT_URL, DT_CREATED, DT_MODIFIED, DOCUMENT_ATTACHEMENT_COUNT FROM WIZ_DOCUMENT') 271 | rows = cur.fetchall() 272 | conn.close() 273 | return rows 274 | 275 | def _get_all_attachment(self) -> list: 276 | """ 获取 WIZ_DOCUMENT_ATTACHMENT 的所有记录 277 | """ 278 | conn = sqlite3.connect(self.index_db) 279 | cur = conn.cursor() 280 | cur.execute('SELECT ATTACHMENT_GUID, DOCUMENT_GUID, ATTACHMENT_NAME, DT_INFO_MODIFIED FROM WIZ_DOCUMENT_ATTACHMENT') 281 | rows = cur.fetchall() 282 | conn.close() 283 | return rows 284 | 285 | def _get_all_tag(self) -> list: 286 | """ 获取 WIZ_TAG 的所有记录 287 | """ 288 | conn = sqlite3.connect(self.index_db) 289 | cur = conn.cursor() 290 | cur.execute('SELECT TAG_GUID, TAG_NAME, DT_MODIFIED FROM WIZ_TAG') 291 | rows = cur.fetchall() 292 | conn.close() 293 | return rows 294 | 295 | def _get_all_document_tag(self) -> list: 296 | """ 获取 WIZ_DOCUMENT_TAG 的所有记录 297 | """ 298 | conn = sqlite3.connect(self.index_db) 299 | cur = conn.cursor() 300 | cur.execute('SELECT DOCUMENT_GUID, TAG_GUID FROM WIZ_DOCUMENT_TAG') 301 | rows = cur.fetchall() 302 | conn.close() 303 | return rows 304 | 305 | def __repr__(self): 306 | return f'' 307 | 308 | 309 | class WizStorage(object): 310 | """ 保存所有为知笔记的数据 311 | """ 312 | # 工作文件夹所在地址,临时文件会置于工作文件夹中 313 | work_dir: Path 314 | 315 | # 为知笔记文档解压到这个文件夹 316 | documents_dir: Path 317 | 318 | wiznote_dir: Path 319 | user_id: str 320 | user_dir: Path 321 | group_dir: Path 322 | 323 | # 是否为 group 仓库 324 | is_group_storage: bool = False 325 | 326 | data_dir: DataDir 327 | 328 | # 所有的 TAG 329 | tags: list[WizTag] = [] 330 | # 键名为文档的 guid,键值为该该文档中的 Tag 331 | tags_in_document: dict[str, list[WizTag]] = {} 332 | 333 | # 所有的附件 334 | attachments: list[WizAttachment] = [] 335 | # 键名为文档的 guid,键值为该该文档中的 Attachment 336 | attachments_in_document: dict[str, list[WizAttachment]] = {} 337 | 338 | # 所有的图片 339 | images: list[WizImage] = [] 340 | # 键名为文档的 guid,键值为该该文档中的 Image 341 | images_in_document: dict[str, list[WizImage]] = {} 342 | 343 | # 所有的文档 344 | documents: list[WizDocument] = [] 345 | 346 | def __init__(self, user_id: str, wiznote_dir: Path, is_group_storage: bool = False, work_dir: Path = None): 347 | """ 定义位置笔记文件夹 348 | :param user_id: 帐号邮箱 349 | :param winznote_dir: 帐号所在文件夹 350 | :param work_dir: 工作文件夹,用于解压文件等操作,若不提供则使用临时文件夹 351 | """ 352 | self.work_dir = work_dir or default_work_dir 353 | 354 | # 创建专门解压缩位置文档的文件夹 355 | self.documents_dir = self.work_dir.joinpath('documents') 356 | if not self.documents_dir.exists(): 357 | self.documents_dir.mkdir(parents=True) 358 | 359 | self.wiznote_dir = wiznote_dir 360 | self.user_id = user_id 361 | self.user_dir = self.wiznote_dir.joinpath(user_id) 362 | self.group_dir = self.user_dir.joinpath('group') 363 | self.is_group_storage = is_group_storage 364 | 365 | # data 的根文件夹 366 | root_data_dir = DataDir(self.user_dir.joinpath('data/')) 367 | # 获取 group 仓库,位于 data 根文件夹之下 368 | if self.is_group_storage: 369 | biz_guid = self._get_biz_guid(root_data_dir.index_db) 370 | self.data_dir = DataDir(self.group_dir.joinpath(biz_guid)) 371 | else: 372 | self.data_dir = root_data_dir 373 | 374 | def _get_biz_guid(self, index_db: Path) -> str: 375 | """ 通过一次查询获取 user 的 guid 376 | """ 377 | conn = sqlite3.connect(index_db) 378 | cur = conn.cursor() 379 | cur.execute('SELECT BIZ_GUID FROM WIZ_USER where USER_ID=?', (self.user_id,)) 380 | row = cur.fetchone() 381 | conn.close() 382 | if row is not None: 383 | return row[0] 384 | return None 385 | 386 | def _build_tags(self) -> tuple[list[WizTag], dict[str, list[WizTag]]]: 387 | """ 根据数据库内容构建所有的 tag 列表 388 | 创建一个 dict ,键名为文档 guid,键值为该文档中的 Tag 列表 389 | 返回这两个列表 390 | """ 391 | tag_rows = self.data_dir._get_all_tag() 392 | tags: list[WizTag] = [] 393 | # 创建一个临时的 dict 用于查找 tag guid 394 | key_tags: dict[str, WizTag] = {} 395 | for row in tag_rows: 396 | tag = WizTag(*row) 397 | tags.append(tag) 398 | key_tags[tag.guid] = tag 399 | 400 | tag_in_doc_rows = self.data_dir._get_all_document_tag() 401 | tags_dict: dict[str, list[WizTag]] = {} 402 | 403 | for row in tag_in_doc_rows: 404 | doc_guid = row[0] 405 | tag_guid = row[1] 406 | 407 | if tags_dict.get(doc_guid) is None: 408 | tags_dict[doc_guid] = [] 409 | 410 | # 如果在 key_tags 中找不到 tag_guid 会报错,此时就需要检查为知笔记中的 tag 设置了 411 | tags_dict[doc_guid].append(key_tags[tag_guid]) 412 | return tags, tags_dict 413 | 414 | def _build_attachments(self) -> tuple[list[WizAttachment], dict[str, list[WizAttachment]]]: 415 | """ 根据数据库内容构建所有的 attachemnt 列表 416 | 创建一个 dict ,键名为文档 guid,键值为该文档中的 attachment 列表 417 | 返回这两个列表 418 | """ 419 | rows = self.data_dir._get_all_attachment() 420 | attachments: list[WizAttachment] = [] 421 | 422 | attachments_in_document: dict[str, list[WizAttachment]] = {} 423 | 424 | for row in rows: 425 | attachment = WizAttachment(*row, self.data_dir.attachments_dir) 426 | attachments.append(attachment) 427 | if attachments_in_document.get(attachment.doc_guid) is None: 428 | attachments_in_document[attachment.doc_guid] = [] 429 | attachments_in_document[attachment.doc_guid].append(attachment) 430 | return attachments, attachments_in_document 431 | 432 | def build_documents(self) -> list[WizDocument]: 433 | """ 根据数据库内容构建所有的 document 列表 434 | """ 435 | rows = self.data_dir._get_all_document() 436 | 437 | attachments, attachments_in_doc = self._build_attachments() 438 | tags, tags_in_doc = self._build_tags() 439 | 440 | self.attachments = attachments 441 | self.attachments_in_document = attachments_in_doc 442 | self.tags = tags 443 | self.tags_in_document = tags_in_doc 444 | 445 | documents: list[WizDocument] = [] 446 | for row in rows: 447 | document = WizDocument(*row, self.data_dir.notes_dir, self.documents_dir, check_file=True) 448 | document.resolve( 449 | self.attachments_in_document.get(document.guid, []), 450 | self.tags_in_document.get(document.guid, []) 451 | ) 452 | documents.append(document) 453 | return documents 454 | 455 | def build_document(self, guid: str) -> WizDocument: 456 | """ 构建一个 document 457 | """ 458 | document_row, attachment_rows, tag_rows = self.data_dir._get_one_document(guid) 459 | if not document_row: 460 | raise ValueError(f'找不到文档 {guid}!') 461 | 462 | attachments: list[WizAttachment] = [] 463 | for row in attachment_rows: 464 | attachments.append(WizAttachment(*row, self.data_dir.attachments_dir, check_file=False)) 465 | 466 | tags: list[WizTag] = [] 467 | for row in tag_rows: 468 | tags.append(WizTag(*row)) 469 | 470 | document = WizDocument(*document_row, self.data_dir.notes_dir, self.documents_dir, check_file=True) 471 | document.resolve(attachments, tags) 472 | return document 473 | 474 | def resolve(self) -> None: 475 | """ 解析所有文档并保存相关数据 476 | 调用此方法后,所有数据安全并可用 477 | """ 478 | self.documents = self.build_documents() 479 | 480 | def clear(self) -> None: 481 | """ 删除解压的临时文件夹 482 | """ 483 | self.documents_dir.unlink() 484 | 485 | -------------------------------------------------------------------------------- /wiznoteformac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zrong/wiz2joplin/0ce8bf9a867171176c28f199addbef95fe8c6b96/wiznoteformac.png --------------------------------------------------------------------------------