├── .editorconfig ├── .gitignore ├── .travis.yml ├── AUTHORS ├── LICENSE ├── README.md ├── docs ├── CNAME ├── concept.md ├── features.md ├── images │ ├── favicon.png │ ├── iscc-algo-design.svg │ ├── iscc-algo-design2.svg │ ├── iscc-badge.svg │ ├── iscc-creation-instance-id.svg │ ├── iscc-creation-process.svg │ ├── iscc-decentralized-issuance.svg │ ├── iscc-image-clustering.svg │ ├── iscc-pcf.svg │ ├── iscc-similarity-hash.svg │ ├── iscc-web-demo.svg │ ├── logo-black.svg │ └── logo-white.svg ├── index.md ├── license.md ├── resources.md ├── specification.md └── stylesheets │ └── custom.css ├── mkdocs.yml ├── poetry.lock ├── pyproject.toml ├── src └── iscc │ ├── __init__.py │ ├── const.py │ ├── iscc.py │ └── iscc.pyi ├── tests ├── __init__.py ├── build_test_data.py ├── file_image_cat.gif ├── file_image_cat.jpg ├── file_image_cat.png ├── file_image_lenna.jpg ├── file_image_pixel_png_black.png ├── file_image_pixel_png_transp.png ├── file_image_pixel_png_white.png ├── test_data.json ├── test_inputs.json └── test_iscc.py └── tools ├── .gitignore ├── __init__.py ├── convert.py └── stamp.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # see http://editorconfig.org 2 | 3 | # Top-level config 4 | root = true 5 | 6 | # All files 7 | [*] 8 | charset = utf-8 9 | indent_style = space 10 | indent_size = 4 11 | end_of_line = lf 12 | insert_final_newline = true 13 | trim_trailing_whitespace = true 14 | 15 | # Windows scripts 16 | [*.{bat,cmd,ps1}] 17 | end_of_line = crlf 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # PyCharm 107 | .idea 108 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis-CI Configuration 2 | language: python 3 | 4 | cache: 5 | pip: true 6 | directories: 7 | - "$HOME/.cache/pypoetry" 8 | 9 | matrix: 10 | include: 11 | - python: "3.5" 12 | - python: "3.6" 13 | - python: "3.7" 14 | dist: xenial 15 | - python: "3.8" 16 | dist: bionic 17 | 18 | install: 19 | - curl -sSL https://raw.githubusercontent.com/sdispater/poetry/master/get-poetry.py -o get-poetry.py 20 | - python get-poetry.py --yes 21 | - source $HOME/.poetry/env 22 | - poetry install 23 | 24 | script: pytest -q tests/ 25 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | The ISCC Specification and Reference Implementation was originally created in 2016 by 2 | the Content Blockchain Project. 3 | 4 | Author: Titusz Pan 5 | 6 | Here is a list of much-appreciated contributors: 7 | 8 | Alexander Niederbühl 9 | Lars Wallin 10 | Laura Dawson 11 | Patricia Schinke 12 | Sebastian Posth 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2020, Titusz Pan & Contributors 2 | 3 | Reference source code is licensed under BSD-2-clause 4 | 5 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 6 | following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following 9 | disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 12 | disclaimer in the documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 15 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 17 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 18 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 19 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 20 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 21 | 22 | 23 | Documentation is licensed under CC BY-NC-SA 4.0: 24 | 25 | Attribution-NonCommercial-ShareAlike 4.0 International 26 | 27 | ======================================================================= 28 | 29 | Creative Commons Corporation ("Creative Commons") is not a law firm and 30 | does not provide legal services or legal advice. Distribution of 31 | Creative Commons public licenses does not create a lawyer-client or 32 | other relationship. Creative Commons makes its licenses and related 33 | information available on an "as-is" basis. Creative Commons gives no 34 | warranties regarding its licenses, any material licensed under their 35 | terms and conditions, or any related information. Creative Commons 36 | disclaims all liability for damages resulting from their use to the 37 | fullest extent possible. 38 | 39 | Using Creative Commons Public Licenses 40 | 41 | Creative Commons public licenses provide a standard set of terms and 42 | conditions that creators and other rights holders may use to share 43 | original works of authorship and other material subject to copyright 44 | and certain other rights specified in the public license below. The 45 | following considerations are for informational purposes only, are not 46 | exhaustive, and do not form part of our licenses. 47 | 48 | Considerations for licensors: Our public licenses are 49 | intended for use by those authorized to give the public 50 | permission to use material in ways otherwise restricted by 51 | copyright and certain other rights. Our licenses are 52 | irrevocable. Licensors should read and understand the terms 53 | and conditions of the license they choose before applying it. 54 | Licensors should also secure all rights necessary before 55 | applying our licenses so that the public can reuse the 56 | material as expected. Licensors should clearly mark any 57 | material not subject to the license. This includes other CC- 58 | licensed material, or material used under an exception or 59 | limitation to copyright. More considerations for licensors: 60 | wiki.creativecommons.org/Considerations_for_licensors 61 | 62 | Considerations for the public: By using one of our public 63 | licenses, a licensor grants the public permission to use the 64 | licensed material under specified terms and conditions. If 65 | the licensor's permission is not necessary for any reason--for 66 | example, because of any applicable exception or limitation to 67 | copyright--then that use is not regulated by the license. Our 68 | licenses grant only permissions under copyright and certain 69 | other rights that a licensor has authority to grant. Use of 70 | the licensed material may still be restricted for other 71 | reasons, including because others have copyright or other 72 | rights in the material. A licensor may make special requests, 73 | such as asking that all changes be marked or described. 74 | Although not required by our licenses, you are encouraged to 75 | respect those requests where reasonable. More considerations 76 | for the public: 77 | wiki.creativecommons.org/Considerations_for_licensees 78 | 79 | ======================================================================= 80 | 81 | Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International 82 | Public License 83 | 84 | By exercising the Licensed Rights (defined below), You accept and agree 85 | to be bound by the terms and conditions of this Creative Commons 86 | Attribution-NonCommercial-ShareAlike 4.0 International Public License 87 | ("Public License"). To the extent this Public License may be 88 | interpreted as a contract, You are granted the Licensed Rights in 89 | consideration of Your acceptance of these terms and conditions, and the 90 | Licensor grants You such rights in consideration of benefits the 91 | Licensor receives from making the Licensed Material available under 92 | these terms and conditions. 93 | 94 | 95 | Section 1 -- Definitions. 96 | 97 | a. Adapted Material means material subject to Copyright and Similar 98 | Rights that is derived from or based upon the Licensed Material 99 | and in which the Licensed Material is translated, altered, 100 | arranged, transformed, or otherwise modified in a manner requiring 101 | permission under the Copyright and Similar Rights held by the 102 | Licensor. For purposes of this Public License, where the Licensed 103 | Material is a musical work, performance, or sound recording, 104 | Adapted Material is always produced where the Licensed Material is 105 | synched in timed relation with a moving image. 106 | 107 | b. Adapter's License means the license You apply to Your Copyright 108 | and Similar Rights in Your contributions to Adapted Material in 109 | accordance with the terms and conditions of this Public License. 110 | 111 | c. BY-NC-SA Compatible License means a license listed at 112 | creativecommons.org/compatiblelicenses, approved by Creative 113 | Commons as essentially the equivalent of this Public License. 114 | 115 | d. Copyright and Similar Rights means copyright and/or similar rights 116 | closely related to copyright including, without limitation, 117 | performance, broadcast, sound recording, and Sui Generis Database 118 | Rights, without regard to how the rights are labeled or 119 | categorized. For purposes of this Public License, the rights 120 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 121 | Rights. 122 | 123 | e. Effective Technological Measures means those measures that, in the 124 | absence of proper authority, may not be circumvented under laws 125 | fulfilling obligations under Article 11 of the WIPO Copyright 126 | Treaty adopted on December 20, 1996, and/or similar international 127 | agreements. 128 | 129 | f. Exceptions and Limitations means fair use, fair dealing, and/or 130 | any other exception or limitation to Copyright and Similar Rights 131 | that applies to Your use of the Licensed Material. 132 | 133 | g. License Elements means the license attributes listed in the name 134 | of a Creative Commons Public License. The License Elements of this 135 | Public License are Attribution, NonCommercial, and ShareAlike. 136 | 137 | h. Licensed Material means the artistic or literary work, database, 138 | or other material to which the Licensor applied this Public 139 | License. 140 | 141 | i. Licensed Rights means the rights granted to You subject to the 142 | terms and conditions of this Public License, which are limited to 143 | all Copyright and Similar Rights that apply to Your use of the 144 | Licensed Material and that the Licensor has authority to license. 145 | 146 | j. Licensor means the individual(s) or entity(ies) granting rights 147 | under this Public License. 148 | 149 | k. NonCommercial means not primarily intended for or directed towards 150 | commercial advantage or monetary compensation. For purposes of 151 | this Public License, the exchange of the Licensed Material for 152 | other material subject to Copyright and Similar Rights by digital 153 | file-sharing or similar means is NonCommercial provided there is 154 | no payment of monetary compensation in connection with the 155 | exchange. 156 | 157 | l. Share means to provide material to the public by any means or 158 | process that requires permission under the Licensed Rights, such 159 | as reproduction, public display, public performance, distribution, 160 | dissemination, communication, or importation, and to make material 161 | available to the public including in ways that members of the 162 | public may access the material from a place and at a time 163 | individually chosen by them. 164 | 165 | m. Sui Generis Database Rights means rights other than copyright 166 | resulting from Directive 96/9/EC of the European Parliament and of 167 | the Council of 11 March 1996 on the legal protection of databases, 168 | as amended and/or succeeded, as well as other essentially 169 | equivalent rights anywhere in the world. 170 | 171 | n. You means the individual or entity exercising the Licensed Rights 172 | under this Public License. Your has a corresponding meaning. 173 | 174 | 175 | Section 2 -- Scope. 176 | 177 | a. License grant. 178 | 179 | 1. Subject to the terms and conditions of this Public License, 180 | the Licensor hereby grants You a worldwide, royalty-free, 181 | non-sublicensable, non-exclusive, irrevocable license to 182 | exercise the Licensed Rights in the Licensed Material to: 183 | 184 | a. reproduce and Share the Licensed Material, in whole or 185 | in part, for NonCommercial purposes only; and 186 | 187 | b. produce, reproduce, and Share Adapted Material for 188 | NonCommercial purposes only. 189 | 190 | 2. Exceptions and Limitations. For the avoidance of doubt, where 191 | Exceptions and Limitations apply to Your use, this Public 192 | License does not apply, and You do not need to comply with 193 | its terms and conditions. 194 | 195 | 3. Term. The term of this Public License is specified in Section 196 | 6(a). 197 | 198 | 4. Media and formats; technical modifications allowed. The 199 | Licensor authorizes You to exercise the Licensed Rights in 200 | all media and formats whether now known or hereafter created, 201 | and to make technical modifications necessary to do so. The 202 | Licensor waives and/or agrees not to assert any right or 203 | authority to forbid You from making technical modifications 204 | necessary to exercise the Licensed Rights, including 205 | technical modifications necessary to circumvent Effective 206 | Technological Measures. For purposes of this Public License, 207 | simply making modifications authorized by this Section 2(a) 208 | (4) never produces Adapted Material. 209 | 210 | 5. Downstream recipients. 211 | 212 | a. Offer from the Licensor -- Licensed Material. Every 213 | recipient of the Licensed Material automatically 214 | receives an offer from the Licensor to exercise the 215 | Licensed Rights under the terms and conditions of this 216 | Public License. 217 | 218 | b. Additional offer from the Licensor -- Adapted Material. 219 | Every recipient of Adapted Material from You 220 | automatically receives an offer from the Licensor to 221 | exercise the Licensed Rights in the Adapted Material 222 | under the conditions of the Adapter's License You apply. 223 | 224 | c. No downstream restrictions. You may not offer or impose 225 | any additional or different terms or conditions on, or 226 | apply any Effective Technological Measures to, the 227 | Licensed Material if doing so restricts exercise of the 228 | Licensed Rights by any recipient of the Licensed 229 | Material. 230 | 231 | 6. No endorsement. Nothing in this Public License constitutes or 232 | may be construed as permission to assert or imply that You 233 | are, or that Your use of the Licensed Material is, connected 234 | with, or sponsored, endorsed, or granted official status by, 235 | the Licensor or others designated to receive attribution as 236 | provided in Section 3(a)(1)(A)(i). 237 | 238 | b. Other rights. 239 | 240 | 1. Moral rights, such as the right of integrity, are not 241 | licensed under this Public License, nor are publicity, 242 | privacy, and/or other similar personality rights; however, to 243 | the extent possible, the Licensor waives and/or agrees not to 244 | assert any such rights held by the Licensor to the limited 245 | extent necessary to allow You to exercise the Licensed 246 | Rights, but not otherwise. 247 | 248 | 2. Patent and trademark rights are not licensed under this 249 | Public License. 250 | 251 | 3. To the extent possible, the Licensor waives any right to 252 | collect royalties from You for the exercise of the Licensed 253 | Rights, whether directly or through a collecting society 254 | under any voluntary or waivable statutory or compulsory 255 | licensing scheme. In all other cases the Licensor expressly 256 | reserves any right to collect such royalties, including when 257 | the Licensed Material is used other than for NonCommercial 258 | purposes. 259 | 260 | 261 | Section 3 -- License Conditions. 262 | 263 | Your exercise of the Licensed Rights is expressly made subject to the 264 | following conditions. 265 | 266 | a. Attribution. 267 | 268 | 1. If You Share the Licensed Material (including in modified 269 | form), You must: 270 | 271 | a. retain the following if it is supplied by the Licensor 272 | with the Licensed Material: 273 | 274 | i. identification of the creator(s) of the Licensed 275 | Material and any others designated to receive 276 | attribution, in any reasonable manner requested by 277 | the Licensor (including by pseudonym if 278 | designated); 279 | 280 | ii. a copyright notice; 281 | 282 | iii. a notice that refers to this Public License; 283 | 284 | iv. a notice that refers to the disclaimer of 285 | warranties; 286 | 287 | v. a URI or hyperlink to the Licensed Material to the 288 | extent reasonably practicable; 289 | 290 | b. indicate if You modified the Licensed Material and 291 | retain an indication of any previous modifications; and 292 | 293 | c. indicate the Licensed Material is licensed under this 294 | Public License, and include the text of, or the URI or 295 | hyperlink to, this Public License. 296 | 297 | 2. You may satisfy the conditions in Section 3(a)(1) in any 298 | reasonable manner based on the medium, means, and context in 299 | which You Share the Licensed Material. For example, it may be 300 | reasonable to satisfy the conditions by providing a URI or 301 | hyperlink to a resource that includes the required 302 | information. 303 | 3. If requested by the Licensor, You must remove any of the 304 | information required by Section 3(a)(1)(A) to the extent 305 | reasonably practicable. 306 | 307 | b. ShareAlike. 308 | 309 | In addition to the conditions in Section 3(a), if You Share 310 | Adapted Material You produce, the following conditions also apply. 311 | 312 | 1. The Adapter's License You apply must be a Creative Commons 313 | license with the same License Elements, this version or 314 | later, or a BY-NC-SA Compatible License. 315 | 316 | 2. You must include the text of, or the URI or hyperlink to, the 317 | Adapter's License You apply. You may satisfy this condition 318 | in any reasonable manner based on the medium, means, and 319 | context in which You Share Adapted Material. 320 | 321 | 3. You may not offer or impose any additional or different terms 322 | or conditions on, or apply any Effective Technological 323 | Measures to, Adapted Material that restrict exercise of the 324 | rights granted under the Adapter's License You apply. 325 | 326 | 327 | Section 4 -- Sui Generis Database Rights. 328 | 329 | Where the Licensed Rights include Sui Generis Database Rights that 330 | apply to Your use of the Licensed Material: 331 | 332 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 333 | to extract, reuse, reproduce, and Share all or a substantial 334 | portion of the contents of the database for NonCommercial purposes 335 | only; 336 | 337 | b. if You include all or a substantial portion of the database 338 | contents in a database in which You have Sui Generis Database 339 | Rights, then the database in which You have Sui Generis Database 340 | Rights (but not its individual contents) is Adapted Material, 341 | including for purposes of Section 3(b); and 342 | 343 | c. You must comply with the conditions in Section 3(a) if You Share 344 | all or a substantial portion of the contents of the database. 345 | 346 | For the avoidance of doubt, this Section 4 supplements and does not 347 | replace Your obligations under this Public License where the Licensed 348 | Rights include other Copyright and Similar Rights. 349 | 350 | 351 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 352 | 353 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 354 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 355 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 356 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 357 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 358 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 359 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 360 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 361 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 362 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 363 | 364 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 365 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 366 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 367 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 368 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 369 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 370 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 371 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 372 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 373 | 374 | c. The disclaimer of warranties and limitation of liability provided 375 | above shall be interpreted in a manner that, to the extent 376 | possible, most closely approximates an absolute disclaimer and 377 | waiver of all liability. 378 | 379 | 380 | Section 6 -- Term and Termination. 381 | 382 | a. This Public License applies for the term of the Copyright and 383 | Similar Rights licensed here. However, if You fail to comply with 384 | this Public License, then Your rights under this Public License 385 | terminate automatically. 386 | 387 | b. Where Your right to use the Licensed Material has terminated under 388 | Section 6(a), it reinstates: 389 | 390 | 1. automatically as of the date the violation is cured, provided 391 | it is cured within 30 days of Your discovery of the 392 | violation; or 393 | 394 | 2. upon express reinstatement by the Licensor. 395 | 396 | For the avoidance of doubt, this Section 6(b) does not affect any 397 | right the Licensor may have to seek remedies for Your violations 398 | of this Public License. 399 | 400 | c. For the avoidance of doubt, the Licensor may also offer the 401 | Licensed Material under separate terms or conditions or stop 402 | distributing the Licensed Material at any time; however, doing so 403 | will not terminate this Public License. 404 | 405 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 406 | License. 407 | 408 | 409 | Section 7 -- Other Terms and Conditions. 410 | 411 | a. The Licensor shall not be bound by any additional or different 412 | terms or conditions communicated by You unless expressly agreed. 413 | 414 | b. Any arrangements, understandings, or agreements regarding the 415 | Licensed Material not stated herein are separate from and 416 | independent of the terms and conditions of this Public License. 417 | 418 | 419 | Section 8 -- Interpretation. 420 | 421 | a. For the avoidance of doubt, this Public License does not, and 422 | shall not be interpreted to, reduce, limit, restrict, or impose 423 | conditions on any use of the Licensed Material that could lawfully 424 | be made without permission under this Public License. 425 | 426 | b. To the extent possible, if any provision of this Public License is 427 | deemed unenforceable, it shall be automatically reformed to the 428 | minimum extent necessary to make it enforceable. If the provision 429 | cannot be reformed, it shall be severed from this Public License 430 | without affecting the enforceability of the remaining terms and 431 | conditions. 432 | 433 | c. No term or condition of this Public License will be waived and no 434 | failure to comply consented to unless expressly agreed to by the 435 | Licensor. 436 | 437 | d. Nothing in this Public License constitutes or may be interpreted 438 | as a limitation upon, or waiver of, any privileges and immunities 439 | that apply to the Licensor or You, including from the legal 440 | processes of any jurisdiction or authority. 441 | 442 | ======================================================================= 443 | 444 | Creative Commons is not a party to its public 445 | licenses. Notwithstanding, Creative Commons may elect to apply one of 446 | its public licenses to material it publishes and in those instances 447 | will be considered the “Licensor.” The text of the Creative Commons 448 | public licenses is dedicated to the public domain under the CC0 Public 449 | Domain Dedication. Except for the limited purpose of indicating that 450 | material is shared under a Creative Commons public license or as 451 | otherwise permitted by the Creative Commons policies published at 452 | creativecommons.org/policies, Creative Commons does not authorize the 453 | use of the trademark "Creative Commons" or any other trademark or logo 454 | of Creative Commons without its prior written consent including, 455 | without limitation, in connection with any unauthorized modifications 456 | to any of its public licenses or any other arrangements, 457 | understandings, or agreements concerning use of licensed material. For 458 | the avoidance of doubt, this paragraph does not form part of the 459 | public licenses. 460 | 461 | Creative Commons may be contacted at creativecommons.org. 462 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ISCC - Spec and Reference Code 2 | 3 | [![Build](https://travis-ci.org/iscc/iscc-specs.svg?branch=master)](https://travis-ci.org/iscc/iscc-specs) 4 | [![Version](https://img.shields.io/pypi/v/iscc.svg)](https://pypi.python.org/pypi/iscc/) 5 | [![License](https://img.shields.io/pypi/l/iscc.svg)](https://pypi.python.org/pypi/iscc/) 6 | [![Downloads](https://pepy.tech/badge/iscc)](https://pepi.tech/project/iscc) 7 | [![DOI](https://zenodo.org/badge/96668860.svg)](https://zenodo.org/badge/latestdoi/96668860) 8 | 9 | | WARNING: The code and specs in this repository are an **out of date** early draft and retained for historic reasons only. For the current reference implementation see: [iscc-core](https://github.com/iscc/iscc-core). For status of specs see: [ISO/DIS 24138](https://www.iso.org/standard/77899.html) | 10 | | --- | 11 | 12 | The **International Standard Content Code** is a proposal for an [open standard](https://en.wikipedia.org/wiki/Open_standard) for decentralized content identification. This repository contains the specification of the proposed **ISCC Standard** and a reference implementation in Python3. The latest published version of the specification can be found at [iscc.codes](https://iscc.codes) 13 | 14 | 15 | 16 | ## Installing the reference code 17 | 18 | The reference code is published with the package name [iscc](https://pypi.python.org/pypi/iscc) on Python Package Index. Install it with: 19 | 20 | ``` bash 21 | pip install iscc 22 | ``` 23 | 24 | ## Using the reference code 25 | 26 | A short example on how to create an ISCC Code with the reference implementation. 27 | 28 | ``` python 29 | import iscc 30 | 31 | # Generate ISCC Component Codes 32 | mid, title, extra = iscc.meta_id('Title of Content') 33 | cid = iscc.content_id_text('some text') 34 | did = iscc.data_id('path/to/mediafile.doc') 35 | iid, tophash = iscc.instance_id('path/to/mediafile.doc') 36 | 37 | # Join ISCC Components to fully qualified ISCC Code 38 | iscc_code = '-'.join([mid, cid, did, iid]) 39 | print('ISCC:{}'.format(iscc_code)) 40 | ``` 41 | 42 | ## Working with the specification 43 | 44 | The entire **ISCC Specification** is written in plain text [Markdown](https://en.wikipedia.org/wiki/Markdown). The markdown content is than built and published with the excellent [mkdocs](http://www.mkdocs.org/) documetation tool. If you have some basic command line skills you can build and run the specification site on your own computer. Make sure you have the [git](https://git-scm.com/) and [Python](https://www.python.org/) installed on your system and follow these steps on the command line: 45 | 46 | ``` bash 47 | git clone https://github.com/iscc/iscc-specs.git 48 | cd iscc-specs 49 | pip install -r requirements.txt 50 | mkdocs serve 51 | ``` 52 | 53 | All specification documents can be found in the `./docs` subfolder or the repository. The recommended editor for the markdown files is [Typora](https://typora.io/). If you have commit rights to the [main repository](https://github.com/iscc/iscc-specs) you can deploy the site with a simple `mkdocs gh-deploy`. 54 | 55 | ## Contribute 56 | 57 | Pull requests and other contributions are welcome. Use the [Github Issues](https://github.com/iscc/iscc-specs/issues) section of this project to discuss ideas for the **ISCC Specification**. You may also want join our developer chat on Telegram at . 58 | 59 | ## License 60 | 61 | All of documentation is licensed under the [CC BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). 62 | 63 | Reference code is licensed under BSD-2-Clause. 64 | -------------------------------------------------------------------------------- /docs/CNAME: -------------------------------------------------------------------------------- 1 | iscc.codes 2 | -------------------------------------------------------------------------------- /docs/concept.md: -------------------------------------------------------------------------------- 1 | # ISCC - Concept 2 | 3 | *The internet is shifting towards a network of decentralized peer-to-peer transactions. If we want our transactions on the emerging blockchain networks to be about content we need standardized ways to address content. Our transactions might be payments, attributions, reputation, certification, licenses or entirely new kinds of value transfer. All this will happen much faster and easier if we, as a community, can agree on how to identify content in a decentralized environment.* 4 | 5 | This is the higher level concept of an open proposal to the wider content community for a common content identifier. We would like to share our ideas and spark a conversation with journalists, news agencies, content creators, publishers, distributors, libraries, musicians, scientists, developers, lawyers, rights organizations and all the other participants of the content ecosystem. 6 | 7 | ## Introduction 8 | 9 | The **structure and management** of **global identifiers** strongly correlates with the grade of achievable **automation** and the potential for **innovation** within and across different sectors of the media industries. 10 | 11 | There are many [existing standards](https://xkcd.com/927/) for media identifiers serving a wide array of use cases. Book publishing uses the [**ISBN**](https://www.isbn-international.org/), magazines and journals have the [**ISSN**](https://www.issn.org/), music industry has [**ISRC**](https://isrc.ifpi.org/) and [**ISWC**](http://www.iswc.org/) and film has [**ISAN**](http://www.isan.org/) and [**EIDR**](https://eidr.org/) – each of them serving a set of specific purposes. On the other side of the spectrum there are also generic identifiers standards such as the [**DOI**](https://www.doi.org/), [**ITU HANDLE**](http://www.itu.int/osg/csd/emerging_trends/handle_system/index.html), [**URN**](https://tools.ietf.org/html/rfc8141), [**ARK**](https://tools.ietf.org/html/draft-kunze-ark-18). The DOI, for example, can be used to identify any digital, physical or abstract *object*. All these identifiers have important and distinct roles across different industries and use cases. 12 | 13 | The most substantial differentiator of the **ISCC** is the fact that it is **algorithmically bound to the digital content** it identifies. Other standards require human intervention to assign and track the mapping between identifier and object (binding). Many of those standards focus on how to resolve an identifier to some network location where metadata or the object itself can be found. The **ISCC inverts this principle**. It gives an answer to the question: "Given some digital content, how can I find its identifier to reference the content in a transaction?". This means that the **ISCC** for any digital content can be *found* (generated) from the content itself, without the need to involve any third-party. 14 | 15 | As such the **ISCC** fulfills a distinct role and is **not a replacement for established identifiers**. Rather it is designed as an umbrella standard to augment established identifiers with enhanced algorithmic features. It can be used in the metadata of existing standards or support discoverability (reverse lookup). 16 | 17 | Many of the established systems are based on centralized or hierarchical registries that involve manual and costly management processes. To sustain such systems the costs have to be recouped by fees for identifier assignment, metadata storage or paid access to metadata which inhibits accessibility and discoverability. The overhead, cost and general properties of these systems make them prohibitive for many innovative use cases that require a more informal and generic identifier assignment (eg. granular content). Communities with short lived or user generated content, don't have any agreed-upon global identifiers for their content. 18 | 19 | The fast paced development of the digital media economy has led to an increasing fragmentation of identifiers and new barriers in interoperability. For example major e-book retailers do not require an **ISBN** and instead established their own proprietary identifiers. Amazon has the **ASIN**, Apple has **Apple-ID** and Google has **GKEY**. For many tasks current systems need to track and match all the different vendor specific IDs, which is an inefficient and error prone process. 20 | 21 | Resolving an **ISCC** to a network location, metadata or the content itself can be accomplished with neutral and decentralized blockchain-based registries that don't require a centralized or hierarchical system to manage, track and store unique identifiers, ownership assignments, associated metadata and other information. 22 | 23 | Advances in data structures, algorithms, machine learning and the emergence of crypto economics allows us to invent **new** kinds of **media identifiers** and **re-imagine existing identifiers** with innovative use cases in mind. Blockchains and Smart Contracts offer great opportunities in solving many of the challenges of identifier registration, like centralized management, data duplication and disambiguation, vendor lock-in and long term data retention. 24 | 25 | This is an open proposal to the digital media community and explores the possibilities of a **decentralized **content identifier system. We’d like to establish an open standard for persistent, unique, vendor independent and content derived cross-media identifiers that can be stored and managed on global, public and decentralized blockchains. We envision a self-governing ecosystem with a low barrier of entry where **commercial and non-commercial** initiatives can both innovate and thrive next to each other. 26 | 27 | ## Media Identifiers for Blockchains 28 | 29 | Media cataloging systems tend to get out of hand and become complex and often unmanageable. Our design proposal is focused on keeping the ISCC system as simple and more importantly as **automatable** as possible, while maximizing practical value for the most important use cases — meaning you should get out more than you have to put in. With this in mind we come to the following basic design decisions: 30 | 31 | ### A “Meaningful” Identifier 32 | 33 | In traditional database systems it is recommended practice to work with **surrogate keys** as identifiers. A surrogate key is a dumb number and has no business meaning and is completely decoupled from the data it identifies. Uniqueness of such identifiers is guaranteed either via centralized incremental assignment by the database system or via random UUIDs which have a very low probability of collisions. While random UUIDs could be generated in a decentralized way, both approaches require some external authority that establishes or certifies the linkage between the identifier and the associated metadata and content. This is why we decided to go with a “meaningful” **content and metadata derived identifier (CMDI)**. Anyone will be able to verify that a specific identifier indeed belongs to a given digital content. Even better, anyone can “find” the identifier for a given content without the need to consult external data sources. This approach also captures essential information about the media in the identifier itself, which is very useful in scenarios of machine learning and data analytics. 34 | 35 | ### A Decentralized Identifier 36 | 37 | The **ISCC** is designed to be registry agnostic. This means that content identification codes can be self-issued in a decentralized and parallel fashion without the need for governance by a centralized registration agency. Without registration an **ISCC** is owned by the content and not by a person or organization. An *unregistered* **ISCC** is useful in cases where multiple independent parties exchange information about content. The **CMDI** approach is helpful with common issues like data integrity, validation, de-duplication and disambiguation. Systems that process digital content can integrate ISCC support and benefit immediately. The integrator does not depend on all third-parties having to assign, track and deliver ISCC codes, because those can be generated from the content itself. 38 | 39 | ISCC registration becomes **necessary** when an ISCC code needs to be **globally unique, publicly discoverable, resolvable, owned** or **authenticated**. While these features inevitably require some kind of registry, not all of them require a centralized institutional registry. 40 | 41 | In a centralized system the central authority is in control of the issuance of identifiers and safeguards various requirements like identifier uniqueness or ownership. In a decentralized system where everybody can register an identifier we need a different approach. 42 | 43 | The **ISCC** will specify the necessary protocols to implement the aforementioned features in a decentralized, federated environment and across multiple public blockchains. **Given a registered ISCC code, an application can unambiguously determine on what blockchain (if any), by which account, and at what time an ISCC has been registered. ** 44 | 45 | Registered ISCC codes have to indicate an authoritative public blockchain network. This indicator is part of the ISCC Code itself, such that codes registered on different networks cannot collide. This guarantees uniqueness of ISCC codes across multiple blockchains. 46 | 47 | **Ownership** of ISCC codes (not the identified content) is granted to the signatory of the first transaction for a given ISCC code on the corresponding blockchain. 48 | 49 | **Global uniqueness** of ISCC codes is accomplished by the blockchain indicator in combination with a client side counter. Registration clients first check for a prior registration of a given ISCC code on a given blockchain. If the ISCC code is already registered by another account the client may simply increments a suffix of the code before registration. 50 | 51 | Applications are instructed to ignore duplicate registrations of identical codes that occur on a blockchain after an initial registration. 52 | 53 | This approach retains global clustering and de-duplication features while at the same time offering **owned**, **authenticated** and **globally unique** ISCC codes. The model also allows for verifiable transfers of ISCC ownership. Given an appropriate protocol it is even possible to switch the authoritative blockchain for an ISCC after initial registration without changing the ISCC code itself. 54 | 55 | ### Registration Services 56 | 57 | Registration services offer a plethora of valuable and indispensable benefits. Every industry has its special requirements. Ultimately the stakeholders from those industries will have to set the rules for data curation, metadata management and administrative control. A Blockchain is a low level backend infrastructure. And while blockchains might make access to identifiers and metadata more accessible, there is still cost involved with storing data, running the infrastructure and providing middleware and frontends. Blockchains work as incentive based economic systems. Registrars can offer **commercially viable** value added services on top of the lower level blockchain networks. For example: 58 | 59 | - Identity verification of registrants 60 | - Certification/attestation of registry entries 61 | - Data curation and indexing services 62 | - Blockchain key-management services 63 | - Custodial blockchain account management 64 | - Middleware and front-end applications 65 | - Infrastructure operations 66 | - Participation in blockchain network governance 67 | 68 | ### Storage Considerations 69 | 70 | On a typical public blockchain all data is **fully replicated** among participants. This allows for independent and autonomous validation of transactions. All blockchain data is highly available, immutable, tamper-proof, timestamped and in most cases openly accessible. However, under high load the limited transaction capacity (storage space per unit of time) creates a transaction fee market for on-chain data. This leads to **growing transaction costs** and makes storage a scarce and increasingly precious resource on public decentralized blockchains. For example storing a 46 character identifier on the Ethereum blockchain in July 2019 cost ~ $0.50. So it is mandatory for our identifier and its eventual metadata schema to be very **space efficient **to maximize benefit at minimal cost. The basic metadata that will be required to generate and register identifiers must be: 71 | 72 | - minimal in scope 73 | - clearly specified 74 | - robust against human error 75 | - enforced on technical level 76 | - adequate for public use (no legal or privacy issues) 77 | 78 | ## Layers of Digital Media Identification 79 | 80 | While we examined existing identifiers we discovered that there is often much confusion about the extent or coverage of what exactly is being identified by a given system. With our idea for a generic cross-media identifier we want to put special weight on being precise with our definitions and found it helpful to distinguish between “different layers of digital media identification". We found that these layers exist naturally on a scale from abstract to concrete. Our analysis also showed that existing standard identifiers operate on one or at most two of such layers. The ISCC is designed as a **composite identifier** that takes the different layers of media identification into consideration: 81 | 82 | ### Layer 1 – Abstract Creation 83 | 84 | In the first and most abstract layer we are concerned with distinguishing between different works or creations in the **broadest possible sense**. The scope of identification is completely independent of any manifestations of the work, be it physical or digital in nature. It is also agnostic to creators, rights holders or any specific interpretations, expressions or language versions of a work. It only relates to the intangible creation - the idea itself. 85 | 86 | ### Layer 2 – Semantic Field 87 | 88 | This layer relates to the meaning or essence of a work. It is an amorphous collection or combination of facts, concepts, categories, subjects, topics, themes, assumptions, observations, conclusions, beliefs and other intangible things that the content conveys. The scope of identification is a set of coordinates within a finite and multidimensional semantic space. 89 | 90 | ### Layer 3 – Generic Manifestation 91 | 92 | In this layer we are concerned with the literal structure of a media type specific and normalized manifestation. Namely the basic text, image, audio or video content independent of its semantic meaning or media file encoding and with a tolerance to variation. This "tolerance to variation" bundles a set of different versions with corrections, revisions, edits, updates, personalization, different format encodings or data compression of the same content under one grouping identifier. A generic manifestation is independent of a final digital media product and is specific to an expression, version or interpretation of a work. 93 | 94 | Unfortunately it is not obvious where generic manifestation of a work ends and another one starts. It depends on human interpretation and context. How much editing do we allow before we call it a “different” manifestation and give it a different identifier. A practical but only partial solution to this problem is to create an algorithmically defined and testable spectrum of tolerance to variation per media type. This can provide a stable and repeatable process to distinguish between generic content manifestations. But it is important to understand that such a process is not expected to yield results that are always intuitive to human expectations as to where exactly boundaries should be. 95 | 96 | ### Layer 4 – Media Specific Manifestation 97 | 98 | This layer relates to a **manifestation with a specific encoding**. It identifies a **data-file** encoded and offered in a specific **media format **including a tolerance to variation to account for minor edits and updates within a format without creating a new identifier. For example, one could distinguish between the PDF, DOCX or WEBSITE versions of the same content as generated from a single source publishing system. This layer does only distinguish between products or "artifacts" with a given packaging or encoding. 99 | 100 | ### Layer 5 – Exact Representation 101 | 102 | In this layer we identify a data-file by its exact binary representation without any interpretation of meaning and without any ambiguity. Even a minimal change in data that might not change the interpretation of content would create a different identifier. Like the first four layers, this layer does **not **express any information related to **content location** or **ownership**. 103 | 104 | ### Layer 6 – Individual Copy 105 | 106 | In the physical world we would call a specific book (one that you can take out of your shelve) an **individual copy**. This implies a notion of **locality **and **ownership**. In the digital world the semantics of an individual copy are very different. An individual copy might be distinguished by a license you own or by a personalized watermark applied by the retailer at time of sale or some digital annotations you have added to your digital media file. While there can only ever be **one exact** individual copy of a **physical object**, there always can be **endless replicas** of an "individual copy" of a **digital object**. It is very important to keep this difference in mind. Ignoring this fact has caused countless misunderstandings and is the source of confusion throughout the media industry – especially in the realm of copyright and license discussions. 107 | 108 | We could try to define an **individual digital copy** by its location and exact content on a specific physical storage medium (like a DVD, SSD ...). But this does not account for the fact that it is nearly impossible to stop someone from creating an exact replica of that data or at least a snapshot or recording of the presentation of that data on another storage location. 109 | 110 | And most importantly such a replica does not affect the original data and even less can make it magically disappear. In contrast, if you give your individual copy of your book to someone else, you won't **"have it"** anymore. It is clear, that with digital media this **cannot reliably be the case**. The only way would be to build a [tamper-proof physical device](https://opendime.com/) (secure element) that does not reveal the data itself, which would defeat the purpose by making the content itself unavailable. But there are ways to partially simulate such inherently physical properties in the digital world. Most notably with the emergence of blockchain technology it is now possible to have a **cryptographically secured** and publicly notarized tamper-proof **certificate of ownership. ** This can serve as a record of agreement about ownership of an “individual copy”. But is does not by itself enforce location or accessibility of the content, nor does it prove the authorization of the certifying party itself or the legal validity of the agreement. 111 | 112 | ## Design Principles 113 | 114 | As a generic content identifier the **ISCC Standard** is a an initiative with a broad scope. These are the principles that should guide its design and adoption: 115 | 116 | - Target existing, unsolved, real-world problems 117 | - Provide a technological and automatable solution 118 | - Be generic and useful to a broad audience 119 | - Keep the standard pragmatic and simple to implement 120 | - Keep it extendable and forward compatible 121 | - Provide marketable user-facing sample applications 122 | - Provide machine readable test data for implementers 123 | - Provide developer tools in different programming languages 124 | - Promote implementations in different sectors 125 | - The specification should be open and public 126 | - Engage with other standards and interested parties 127 | 128 | ## Algorithmic Tools 129 | 130 | While many details about the ISCC are still up for discussion we are quite confident about some of the general algorithmic families that will make it into the final specification for the identifier. These will play an important role in how we generate the different components of the identifier: 131 | 132 | - Similarity preserving hash functions (Simhash, Minhash ...) 133 | - Perceptual hashing (pHash, Blockhash, Chromaprint …) 134 | - Content defined chunking (Rabin-Karp, FastCDC ...) 135 | - Merkle trees 136 | 137 | ## ISCC Proof-of-Concept 138 | 139 | Before we settle on the details of the proposed ISCC identifier, we built a simple and reduced proof-of-concept implementation of our ideas. It enables us and other developers to test with real world data and systems and find out early what works and what doesn't. 140 | 141 | ![img](images/iscc-web-demo.svg) 142 | 143 | !!! Update 144 | 145 | An interactive demo of the concept is available at https://isccdemo.content-blockchain.org/ 146 | 147 | The minimal viable, first iteration ISCC will be a byte structure built from the following components: 148 | 149 | ### Meta-ID 150 | 151 | The Meta-ID will be generated as a similarity preserving hash from minimal generic metadata like *title *and *creators*. It operates on **Layer 1 ** and identifies an intangible creation. It is the first and most generic grouping element of the identifier. We will be experimenting with different n-gram sizes and bit-length to find the practical limits of precision and recall for generic metadata. We will also specify a process to disambiguate unintended collisions by adding optional metadata. 152 | 153 | ### Partial Content Flag 154 | 155 | The Partial Content Flag is a 1-bit flag that indicates whether the remaining elements relate to the complete work or only to a subset of it. 156 | 157 | ### Media Type Flag 158 | 159 | The Media Type Flag is a 3 bit flag that allows us to distinguish between up to 8 generic media types **(GMTs)** to which our Content-ID component applies. We define a generic media type as *basic content types* such as plain text or raw pixel data that is specified exactly and extracted from more complex file formats or encodings. We start with generic text and image types and add audio, video and mixed types later. 160 | 161 | ### Content-ID 162 | 163 | The Content-ID operates on **Layer 3** and will be a GMT-specific similarity preserving hash generated from extracted content. It identifies the normalized content of a specific GMT, independent of file format or encoding. It relates to the structural essence of the content and groups similar GMT-specific manifestations of the abstract creation or parts of it (as indicated by the Partial Content Flag). For practical reasons we intentionally skip a **Layer 2** component at this time. It would add unnecessary complexity for a basic proof-of-concept implementation. 164 | 165 | ### Data-ID 166 | 167 | The Data-ID operates on **Layer 4 **and will be a similarity preserving hash generated from shift-resistant content-defined chunks from the raw data of the encoded media blob. It groups complete encoded files with similar content and encoding. This component does not distinguish between GMTs as the files may include multiple different generic media types. 168 | 169 | ### Instance-ID 170 | 171 | The Instance-ID operates on **Layer 5 **and will be the top hash of a Merkle tree generated from (potentially content-defined) chunks of raw data of an encoded media blob. It identifies a concrete manifestation and proves the integrity of the full content. We use the Merkle tree structure because it also allows as to verify integrity of partial chunks without having to have the full data available. This will be very useful in any scenarios of distributed data storage. 172 | 173 | We intentionally skip **Layer 6** at this stage as content ownership and location will be handled on the blockchain layer of the stack and not by the ISCC identifier itself. 174 | -------------------------------------------------------------------------------- /docs/features.md: -------------------------------------------------------------------------------- 1 | title: ISCC - Features 2 | description: Features of the International Standard Content Code 3 | authors: Titusz Pan 4 | 5 | # ISCC - Features 6 | 7 | **The ISCC comes with a number of built-in features:** 8 | 9 | ## Granular Content Management 10 | 11 | The ISCC can be generated for any work as well as for parts, chunks or individual elements of the content. These elements could be an image, a table, a chapter or a quote within a given document. The relation between parent and child-elements can be preserved in the ISCC identifier. Thus, it is possible to connect the various ID's, obtain their relations and identify the work from which any chunk is taken from. This feature can also help to identify plagiarism, in case chunks from one work have been used in a different work - only with access to the ISCC identifiers. 12 | 13 | [![ISCC Granular Content Identification](images/iscc-pcf.svg)](images/iscc-pcf.svg) 14 | 15 | ## Content Identification 16 | 17 | The ISCC is a content code, that is created from the content file itself. Processing the content with the algorithms defined by ISCC specification creates a unique composite code, consisting of four major elements.The ISCC identifies content across multiple, hierarchical layers: From the embedded metadata, the normalized content, the encoded file format up to the individual file. It can be used to automatically distinguish different versions of the same content, to ensure data integrity, to de-duplicate, or to disambiguate content in a given content repository. 18 | 19 | 20 | ## Decentralized Issuance 21 | 22 | [![ISCC Decentralized Issuance](images/iscc-decentralized-issuance.svg)](images/iscc-decentralized-issuance.svg){: .right} 23 | 24 | 25 | The ISCC is managed in a decentralized fashion. This means that anyone with access to the content will be able to create and verify an ISCC based on the content files themselves. The ISCC can be created offline on any local device or app, that supports the suggested standard. The ISCC also ensures that if content files are sent, distributed or otherwise shared among different parties or repositories any participant can be sure to refer to the exact same content file. This will radically simplify digital distribution. 26 | 27 | 28 | ## Designed for Blockchain 29 | 30 | The ISCC is designed to be used in a blockchain environment, but also creates value if being used locally, off-chain or even offline. The ISCC is short enough to be written on any blockchain while preserving its unique features. Or it can be used off-chain within a local content repository for internal processing or non-blockchain digital transactions.. 31 | 32 | [![ISCC Algorithmic Design](images/iscc-algo-design.svg)](images/iscc-algo-design.svg) 33 | 34 | 35 | ## Content Versioning 36 | 37 | During content creation, review processes or distribution, same or similar files are being exchanged among various parties (editors, distributors, retailers, etc.). With the ISCC registered on the blockchain it is possible to timestamp all content versions and variants in order to create an auditable history of related documents over time. This helps to identify content variations on a time scale in order to make sure that users are referring to the correct same file or related versions of the same content. 38 | 39 | 40 | ## Related Product Identification 41 | 42 | The Content-ID is one component of the ISCC. It is a similarity-preserving hash generated from extracted content. It identifies the normalized content of a specific file, independent of file format or encoding. As the Content-ID will remain the same for the same content in various formats, the ISCC automatically connects related formats, like PDF-, MS-word or EPUB-files or JPEG- and PNG-files, etc. 43 | 44 | [![ISCC Image Clustering](images/iscc-image-clustering.svg)](images/iscc-image-clustering.svg) 45 | 46 | ## Content Variant Detection 47 | 48 | The similarity preserving hash of the Content-ID of the ISCC is able to cluster similar variants of content. It identifies the same or similar content and also shows on a scale from 1-64 (or 1-100%) how similar two content variants are. At the same time an application can distinguish between similar but not identical content through the Instance-ID component of the ISCC. This can help to identify e.g. watermarked files. 49 | 50 | 51 | ## Proof of Data Possession 52 | 53 | With the ISCC and a standardized signing algorithm it will be possible to verify whether a user that created the ISCC entry on the blockchain actually had access to the respective content file. 54 | -------------------------------------------------------------------------------- /docs/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/docs/images/favicon.png -------------------------------------------------------------------------------- /docs/images/logo-black.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /docs/images/logo-white.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | title: ISCC - Content Identifiers 2 | description: The intelligent digital media identifier. 3 | authors: Titusz Pan 4 | 5 | # ISCC - Content Identifiers 6 | 7 | ## A Proposal for a Modern and Open Content-Based Identifier 8 | 9 | ![iscc-sample](images/iscc-algo-design2.svg) 10 | 11 | ## The ISCC is... 12 | 13 | - a universal identifier for multiple generic media-types (text, image, audio, video) 14 | - a lightweight and similarity-preserving fingerprint designed for digital content 15 | - designed for cross-sector applicability (journalism, books, music, film, etc.) 16 | - designed to identify content in decentralized and networked environments 17 | - and most importantly it is free, open-source and transparent 18 | 19 | **You want to try it instantly?** Head over to our demo at https://iscc.coblo.net 20 | 21 | ## Motivation 22 | 23 | Increasing amounts of dynamic, short-lived, and granular content need to be managed and require new and innovative tools. 24 | 25 | A crucial prerequisite for content-related transactions to succeed in this new and demanding environment is the capability to address and identify content efficiently. Yet many industries that deal with digital content do not even have standard identifiers. There is no existing solution for those industries that deal with short-lived or granular content such as journalism. There is also no widely adopted standardized identifier for digital images. 26 | 27 | The overhead and cost of manually assigning and tracking identifiers for such content are prohibitive. But there is a solution to the problem: **auto-generated identifiers** created algorithmically from the content itself. 28 | 29 | With the advent of blockchain technology, the Internet is moving towards a network of peer-to-peer transactions. 30 | 31 | In a multi-sided ecosystem, **anybody** may have a legitimate interest to generate, lookup, or register an identifier for some digital content – whether they own the content or not. 32 | 33 | Authorship or copyright is **not** a requirement to create or use an identifier. But **an identifier is a requirement** to communicate and agree on authorship, origin, copyright, and other information. 34 | 35 | Technology allows us to authoritatively map **identifiers to digital content** using open, standardized fingerprinting algorithms. 36 | 37 | Open and accessible **standard identifiers,** designed to manage small and sometimes transient pieces of digital content are fundamental for transactions and sales activities in our increasingly heterogeneous media environment. 38 | 39 | By using standardized, decentralized, algorithmic identifiers for digital content, all ecosystem participants can engage more efficiently in content-related transactions. 40 | 41 | ## Key Features and Differentiators 42 | 43 | - Decentralized issuance through algorithmic creation 44 | - Generic content identification (text, images, audio, video) 45 | - Algorithmic similarity detection and deduplication 46 | - Low management costs 47 | - Low barrier of entry 48 | - Designed for blockchain based registration 49 | 50 | ## How it works 51 | 52 | **ISCC** identifiers are generated algorithmically **from the content itself**. Content files are processed to build the identifier. The ISCC does not have to be manually assigned, neither does it have to be carried around or embedded within the content. The content itself is the source and authority of the **ISCC Code**. 53 | 54 | The **ISCC Code** is a unique, hierarchically structured, composite identifier. It is built from a generic and balanced mix of content-derived, locality-sensitive and similarity-preserving hashes generated from metadata and content. 55 | 56 | The latest version of these pages can be found at [iscc.codes](http://iscc.codes) 57 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | **TITLE**: ISCC - Content Identifiers 4 | 5 | **ISCC**: CCDFPFc87MhdT-CTg5dAueXoTwU-CDFo1xv7eF4sD 6 | 7 | **CC BY-NC-SA 4.0 License** 8 | 9 | Copyright © 2016 - 2020 The Authors, Content Blockchain Project 10 | 11 | Creative Commons License
This work is licensed under a Creative Commons (CC BY-NC-SA 4.0). 12 | -------------------------------------------------------------------------------- /docs/resources.md: -------------------------------------------------------------------------------- 1 | title: ISCC - Resources 2 | description: ISCC software, demos, tools, developer libs, integrations, presentations, articles and other resources 3 | authors: Titusz Pan 4 | 5 | # ISCC - Resources 6 | 7 | If you find something that is missing from this collection of resources for the ISCC, [please add it](https://github.com/iscc/iscc-specs/edit/master/docs/resources.md). 8 | 9 | ## ISCC - Official Software & Tools 10 | 11 | ### [ISCC - CLI](https://github.com/iscc/iscc-cli) 12 | 13 | An open-source command-line tool that can be used on **Windows**, **Linux**, and **Mac** systems by developers and computer savvy persons to create ISCC codes from media files and URLs. The tool is based on the [reference implementation](https://github.com/iscc/iscc-specs) but also includes **new and experimental features** (e.g., Audio-Codes, Video-Codes) that are not yet part of the [public specification](https://iscc.codes/specification/). 14 | 15 | ### [ISCC - Web Service](https://github.com/iscc/iscc-service) 16 | 17 | A REST OpenAPI backend service application for creating [**ISCC codes**](https://iscc.codes/) for digital media files and URLs. The Webservice is built with [FastAPI](https://github.com/tiangolo/fastapi) and makes use of the [ISCC reference implementation](https://github.com/iscc/iscc-specs) and the [ISCC Command Line Tool](https://github.com/iscc/iscc-cli) and includes interactive API documentation. 18 | 19 | ### [ISCC - Specification & Reference Implementation](https://github.com/iscc/iscc-specs) 20 | 21 | The official ISCC reference implementation. The reference code is published on the [Python Package Index](https://pypi.org/project/iscc/) and can be installed as a library by developers. The ISCC specification is written in markdown and hosted in the same [source code repository](https://github.com/iscc/iscc-specs) and published at [http://iscc.codes/specification](https://iscc.codes/specification/). For contributions and public discussions, please use the corresponding [issue tracker](https://github.com/iscc/iscc-specs/issues). 22 | 23 | ## ISCC - Third-Party Implementations 24 | 25 | ### [ISCC-RS](https://github.com/iscc/iscc-rs) 26 | 27 | Rust implementation of the [ISCC specification](https://iscc.codes/specification). 28 | 29 | ### [ISCC-RS-CLI](https://github.com/iscc/iscc-rs-cli) 30 | 31 | Command-line tool based on the [iscc-rs](https://github.com/iscc/iscc-rs) library. 32 | 33 | ### [ISCC-GOLANG](https://github.com/coblo/iscc-golang) 34 | 35 | Golang implementation of the ISCC protocol. 36 | 37 | ### [ISCC-DOTNET](https://github.com/iscc/iscc-dotnet) 38 | 39 | C# .Net Core implementation of the ISCC protocol. 40 | 41 | ## ISCC - Technical Demos & Integrations 42 | 43 | ### [Web Demo](https://iscc.coblo.net/) 44 | 45 | A demo web application that can generate and lookup ISCC codes from files or URLs and visualizes differences between ISCC Codes. The [source code](https://github.com/coblo/iscc-demo) is also available. 46 | 47 | ### [Data Streams](https://explorer.coblo.net/streams/) 48 | 49 | The Content Blockchain Testnet is running a public data-stream of ISCC codes for testing and demonstration purposes. The web demo uses the [ISCC data-stream](https://explorer.coblo.net/stream/iscc) for lookups. 50 | 51 | ### [Clink.ID](https://clink.id/) 52 | 53 | [CLink.ID](https://clink.id/) is an interoperable registry, architected to recognize identifiers and meta-data regardless of whether they are Handle- or content-based and/or block-chain inspired. CLink.ID is operated by [CLink Media , Inc.](https://clink.media/) and has integrated [ISCC in its registry](https://clink.id/#objects/20.500.12200.100/5d8e3c3f9d6c6a759261). 54 | 55 | ### [Smart License Demo](https://smartlicense.coblo.net/) 56 | 57 | Prototype demo of a smart licensing framework that uses ISCC codes for content identification. [Source code](https://github.com/coblo/smartlicense) is also available. 58 | 59 | ### [Blockchain Wallet Demo](https://github.com/coblo/gui-demo) 60 | An early prototype demo of a blockchain wallet that uses ISCC codes for license tokenization. 61 | 62 | ## ISCC - Presentations & Articles 63 | 64 | ### [Blockchain for Science Conference (Berlin, 2019)](https://www.youtube.com/watch?v=4OCvPrDhGuQ) 65 | 66 | ISCC - Similarity hashing for digital content identification in decentralized environments. [Recording](https://www.youtube.com/watch?v=4OCvPrDhGuQ) of the 30-minute talk. 67 | 68 | ## Organizations and Initiatives 69 | 70 | ### [ISCC Foundation](https://iscc.foundation/) 71 | 72 | The **ISCC Foundation** is an independent international **nonprofit organization** that promotes information technologies for the common good. 73 | 74 | In particular, the foundation supports the **ISCC** and promotes the development and adoption of open standards and open source technologies as well as tools and services that enable individuals and organizations to better **create, manage, discover, access, share, and monetize digital content, knowledge, and ideas**. 75 | 76 | ### [ISO - International Organization for Standardization](https://www.iso.org/committee/48836.html) 77 | 78 | **ISO/TC 46/SC 9** (Identification and description) has accepted the **International Standard Content Code** as a preliminary work item and created a new working group (WG 18 - Digital-Content-Based Identification). 79 | -------------------------------------------------------------------------------- /docs/stylesheets/custom.css: -------------------------------------------------------------------------------- 1 | .md-typeset__table { 2 | min-width: 100%; 3 | } 4 | 5 | dt { 6 | font-weight: bold; 7 | font-size: 1em; 8 | 9 | } 10 | 11 | dd { 12 | margin: 0; 13 | font-size: 0.9em; 14 | padding: 0 0 0.5em 0; 15 | } 16 | 17 | a code { 18 | font-size: 0.8em !important; 19 | background-color: lightgray !important; 20 | padding: 2px 6px 2px 6px !important; 21 | } 22 | 23 | 24 | .right { 25 | float: right; 26 | width: 40%; 27 | margin-left: 10px; 28 | } 29 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | # Project information 2 | site_name: 'ISCC - Content Identifiers' 3 | site_description: 'International Standard Content Code - Specification' 4 | site_author: 'Titusz Pan' 5 | site_url: 'http://iscc.codes' 6 | 7 | # Repository 8 | repo_name: 'iscc/iscc-specs' 9 | repo_url: 'https://github.com/iscc/iscc-specs' 10 | edit_uri: 'edit/master/docs/' 11 | 12 | # Copyright 13 | copyright: 'Copyright © 2016-2020 The Authors 14 | | Privacy Policy 15 | | Cookie Policy 16 | | Imprint 17 | | Disclaimer' 18 | 19 | # Documentation and Theme 20 | theme: 21 | name: 'material' 22 | language: 'en' 23 | logo: 'images/logo-white.svg' 24 | favicon: 'images/favicon.png' 25 | palette: 26 | primary: 'blue' 27 | accent: 'pink' 28 | feature: 29 | tabs: false 30 | 31 | extra_css: 32 | - 'stylesheets/custom.css' 33 | 34 | nav: 35 | - Overview: 'index.md' 36 | - Features: 'features.md' 37 | - Concept: 'concept.md' 38 | - Specification: 'specification.md' 39 | - Resources: 'resources.md' 40 | - Versions: 41 | - Version 1.x: 'specification.md' 42 | - Version 1.0: 'https://github.com/iscc/iscc-specs/blob/version-1.0/docs/specification.md' 43 | - License: 'license.md' 44 | 45 | # Options 46 | extra: 47 | search: 48 | language: 'en' 49 | social: 50 | - type: 'home' 51 | link: 'https://iscc.codes' 52 | - type: 'github' 53 | link: 'https://github.com/iscc' 54 | - type: 'telegram' 55 | link: 'https://t.me/iscc_dev' 56 | 57 | 58 | markdown_extensions: 59 | - markdown.extensions.abbr 60 | - markdown.extensions.admonition 61 | - markdown.extensions.codehilite: 62 | linenums: false 63 | guess_lang: false 64 | - markdown.extensions.attr_list 65 | - markdown.extensions.def_list 66 | - markdown.extensions.footnotes 67 | - markdown.extensions.meta 68 | - markdown.extensions.toc: 69 | permalink: '#' 70 | - pymdownx.magiclink 71 | 72 | plugins: 73 | - search 74 | - git-revision-date-localized: 75 | type: iso_datetime 76 | - minify: 77 | minify_html: true 78 | - redirects: 79 | redirect_maps: 80 | 'implementations.md': 'resources.md' 81 | 82 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | category = "dev" 3 | description = "Atomic file writes." 4 | marker = "sys_platform == \"win32\"" 5 | name = "atomicwrites" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 8 | version = "1.4.0" 9 | 10 | [[package]] 11 | category = "dev" 12 | description = "Classes Without Boilerplate" 13 | name = "attrs" 14 | optional = false 15 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 16 | version = "19.3.0" 17 | 18 | [package.extras] 19 | azure-pipelines = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "pytest-azurepipelines"] 20 | dev = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "sphinx", "pre-commit"] 21 | docs = ["sphinx", "zope.interface"] 22 | tests = ["coverage", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"] 23 | 24 | [[package]] 25 | category = "dev" 26 | description = "Internationalization utilities" 27 | name = "babel" 28 | optional = false 29 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 30 | version = "2.8.0" 31 | 32 | [package.dependencies] 33 | pytz = ">=2015.7" 34 | 35 | [[package]] 36 | category = "main" 37 | description = "Composable command line interface toolkit" 38 | name = "click" 39 | optional = false 40 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 41 | version = "7.1.2" 42 | 43 | [[package]] 44 | category = "dev" 45 | description = "Cross-platform colored terminal text." 46 | marker = "sys_platform == \"win32\"" 47 | name = "colorama" 48 | optional = false 49 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 50 | version = "0.4.3" 51 | 52 | [[package]] 53 | category = "main" 54 | description = "Clean single-source support for Python 3 and 2" 55 | name = "future" 56 | optional = false 57 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 58 | version = "0.18.2" 59 | 60 | [[package]] 61 | category = "dev" 62 | description = "Git Object Database" 63 | name = "gitdb" 64 | optional = false 65 | python-versions = ">=3.4" 66 | version = "4.0.5" 67 | 68 | [package.dependencies] 69 | smmap = ">=3.0.1,<4" 70 | 71 | [[package]] 72 | category = "dev" 73 | description = "Python Git Library" 74 | name = "gitpython" 75 | optional = false 76 | python-versions = ">=3.4" 77 | version = "3.1.7" 78 | 79 | [package.dependencies] 80 | gitdb = ">=4.0.1,<5" 81 | 82 | [[package]] 83 | category = "dev" 84 | description = "An HTML Minifier" 85 | name = "htmlmin" 86 | optional = false 87 | python-versions = "*" 88 | version = "0.1.12" 89 | 90 | [[package]] 91 | category = "main" 92 | description = "Read metadata from Python packages" 93 | marker = "python_version < \"3.8\"" 94 | name = "importlib-metadata" 95 | optional = false 96 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" 97 | version = "1.7.0" 98 | 99 | [package.dependencies] 100 | zipp = ">=0.5" 101 | 102 | [package.extras] 103 | docs = ["sphinx", "rst.linker"] 104 | testing = ["packaging", "pep517", "importlib-resources (>=1.3)"] 105 | 106 | [[package]] 107 | category = "main" 108 | description = "A very fast and expressive template engine." 109 | name = "jinja2" 110 | optional = false 111 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 112 | version = "2.11.2" 113 | 114 | [package.dependencies] 115 | MarkupSafe = ">=0.23" 116 | 117 | [package.extras] 118 | i18n = ["Babel (>=0.8)"] 119 | 120 | [[package]] 121 | category = "main" 122 | description = "Lightweight pipelining: using Python functions as pipeline jobs." 123 | marker = "python_version > \"2.7\"" 124 | name = "joblib" 125 | optional = false 126 | python-versions = "*" 127 | version = "0.14.1" 128 | 129 | [[package]] 130 | category = "dev" 131 | description = "JavaScript minifier." 132 | name = "jsmin" 133 | optional = false 134 | python-versions = "*" 135 | version = "2.2.2" 136 | 137 | [[package]] 138 | category = "main" 139 | description = "Python LiveReload is an awesome tool for web developers" 140 | name = "livereload" 141 | optional = false 142 | python-versions = "*" 143 | version = "2.6.2" 144 | 145 | [package.dependencies] 146 | six = "*" 147 | 148 | [package.dependencies.tornado] 149 | python = ">=2.8" 150 | version = "*" 151 | 152 | [[package]] 153 | category = "main" 154 | description = "A Python implementation of Lunr.js" 155 | name = "lunr" 156 | optional = false 157 | python-versions = "*" 158 | version = "0.5.8" 159 | 160 | [package.dependencies] 161 | future = ">=0.16.0" 162 | six = ">=1.11.0" 163 | 164 | [package.dependencies.nltk] 165 | optional = true 166 | python = ">=2.8" 167 | version = ">=3.2.5" 168 | 169 | [package.extras] 170 | languages = ["nltk (>=3.2.5,<3.5)", "nltk (>=3.2.5)"] 171 | 172 | [[package]] 173 | category = "main" 174 | description = "Python implementation of Markdown." 175 | name = "markdown" 176 | optional = false 177 | python-versions = ">=3.5" 178 | version = "3.2.2" 179 | 180 | [package.dependencies] 181 | [package.dependencies.importlib-metadata] 182 | python = "<3.8" 183 | version = "*" 184 | 185 | [package.extras] 186 | testing = ["coverage", "pyyaml"] 187 | 188 | [[package]] 189 | category = "main" 190 | description = "Safely add untrusted strings to HTML/XML markup." 191 | name = "markupsafe" 192 | optional = false 193 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*" 194 | version = "1.1.1" 195 | 196 | [[package]] 197 | category = "main" 198 | description = "Project documentation with Markdown." 199 | name = "mkdocs" 200 | optional = false 201 | python-versions = ">=3.5" 202 | version = "1.1.2" 203 | 204 | [package.dependencies] 205 | Jinja2 = ">=2.10.1" 206 | Markdown = ">=3.2.1" 207 | PyYAML = ">=3.10" 208 | click = ">=3.3" 209 | livereload = ">=2.5.1" 210 | tornado = ">=5.0" 211 | 212 | [package.dependencies.lunr] 213 | extras = ["languages"] 214 | version = "0.5.8" 215 | 216 | [[package]] 217 | category = "dev" 218 | description = "Mkdocs plugin that enables displaying the localized date of the last git modification of a markdown file." 219 | name = "mkdocs-git-revision-date-localized-plugin" 220 | optional = false 221 | python-versions = ">=3.5" 222 | version = "0.7" 223 | 224 | [package.dependencies] 225 | GitPython = "*" 226 | babel = ">=2.7.0" 227 | mkdocs = ">=1.0" 228 | 229 | [[package]] 230 | category = "dev" 231 | description = "A Material Design theme for MkDocs" 232 | name = "mkdocs-material" 233 | optional = false 234 | python-versions = "*" 235 | version = "4.6.3" 236 | 237 | [package.dependencies] 238 | Pygments = ">=2.4" 239 | markdown = ">=3.2" 240 | mkdocs = ">=1.0" 241 | pymdown-extensions = ">=6.3" 242 | 243 | [[package]] 244 | category = "dev" 245 | description = "An MkDocs plugin to minify HTML and/or JS files prior to being written to disk" 246 | name = "mkdocs-minify-plugin" 247 | optional = false 248 | python-versions = ">=2.7" 249 | version = "0.3.0" 250 | 251 | [package.dependencies] 252 | htmlmin = ">=0.1.4" 253 | jsmin = ">=2.2.2" 254 | mkdocs = ">=1.0.4" 255 | 256 | [[package]] 257 | category = "main" 258 | description = "A MkDocs plugin for dynamic page redirects to prevent broken links." 259 | name = "mkdocs-redirects" 260 | optional = false 261 | python-versions = ">=2.7" 262 | version = "1.0.1" 263 | 264 | [package.dependencies] 265 | mkdocs = ">=1.0.4" 266 | 267 | [package.extras] 268 | release = ["twine (1.13.0)"] 269 | 270 | [[package]] 271 | category = "dev" 272 | description = "More routines for operating on iterables, beyond itertools" 273 | name = "more-itertools" 274 | optional = false 275 | python-versions = ">=3.5" 276 | version = "8.4.0" 277 | 278 | [[package]] 279 | category = "main" 280 | description = "Natural Language Toolkit" 281 | marker = "python_version > \"2.7\"" 282 | name = "nltk" 283 | optional = false 284 | python-versions = "*" 285 | version = "3.5" 286 | 287 | [package.dependencies] 288 | click = "*" 289 | joblib = "*" 290 | regex = "*" 291 | tqdm = "*" 292 | 293 | [package.extras] 294 | all = ["requests", "numpy", "python-crfsuite", "scikit-learn", "twython", "pyparsing", "scipy", "matplotlib", "gensim"] 295 | corenlp = ["requests"] 296 | machine_learning = ["gensim", "numpy", "python-crfsuite", "scikit-learn", "scipy"] 297 | plot = ["matplotlib"] 298 | tgrep = ["pyparsing"] 299 | twitter = ["twython"] 300 | 301 | [[package]] 302 | category = "dev" 303 | description = "Core utilities for Python packages" 304 | name = "packaging" 305 | optional = false 306 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 307 | version = "20.4" 308 | 309 | [package.dependencies] 310 | pyparsing = ">=2.0.2" 311 | six = "*" 312 | 313 | [[package]] 314 | category = "dev" 315 | description = "Object-oriented filesystem paths" 316 | marker = "python_version < \"3.6\"" 317 | name = "pathlib2" 318 | optional = false 319 | python-versions = "*" 320 | version = "2.3.5" 321 | 322 | [package.dependencies] 323 | six = "*" 324 | 325 | [[package]] 326 | category = "main" 327 | description = "Python Imaging Library (Fork)" 328 | name = "pillow" 329 | optional = false 330 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 331 | version = "6.2.2" 332 | 333 | [[package]] 334 | category = "dev" 335 | description = "plugin and hook calling mechanisms for python" 336 | name = "pluggy" 337 | optional = false 338 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 339 | version = "0.13.1" 340 | 341 | [package.dependencies] 342 | [package.dependencies.importlib-metadata] 343 | python = "<3.8" 344 | version = ">=0.12" 345 | 346 | [package.extras] 347 | dev = ["pre-commit", "tox"] 348 | 349 | [[package]] 350 | category = "dev" 351 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 352 | name = "py" 353 | optional = false 354 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 355 | version = "1.9.0" 356 | 357 | [[package]] 358 | category = "dev" 359 | description = "Pygments is a syntax highlighting package written in Python." 360 | name = "pygments" 361 | optional = false 362 | python-versions = ">=3.5" 363 | version = "2.6.1" 364 | 365 | [[package]] 366 | category = "dev" 367 | description = "Extension pack for Python Markdown." 368 | name = "pymdown-extensions" 369 | optional = false 370 | python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" 371 | version = "7.1" 372 | 373 | [package.dependencies] 374 | Markdown = ">=3.2" 375 | 376 | [[package]] 377 | category = "dev" 378 | description = "Python parsing module" 379 | name = "pyparsing" 380 | optional = false 381 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 382 | version = "2.4.7" 383 | 384 | [[package]] 385 | category = "dev" 386 | description = "pytest: simple powerful testing with Python" 387 | name = "pytest" 388 | optional = false 389 | python-versions = ">=3.5" 390 | version = "5.4.3" 391 | 392 | [package.dependencies] 393 | atomicwrites = ">=1.0" 394 | attrs = ">=17.4.0" 395 | colorama = "*" 396 | more-itertools = ">=4.0.0" 397 | packaging = "*" 398 | pluggy = ">=0.12,<1.0" 399 | py = ">=1.5.0" 400 | wcwidth = "*" 401 | 402 | [package.dependencies.importlib-metadata] 403 | python = "<3.8" 404 | version = ">=0.12" 405 | 406 | [package.dependencies.pathlib2] 407 | python = "<3.6" 408 | version = ">=2.2.0" 409 | 410 | [package.extras] 411 | checkqa-mypy = ["mypy (v0.761)"] 412 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] 413 | 414 | [[package]] 415 | category = "dev" 416 | description = "World timezone definitions, modern and historical" 417 | name = "pytz" 418 | optional = false 419 | python-versions = "*" 420 | version = "2020.1" 421 | 422 | [[package]] 423 | category = "main" 424 | description = "YAML parser and emitter for Python" 425 | name = "pyyaml" 426 | optional = false 427 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 428 | version = "5.3.1" 429 | 430 | [[package]] 431 | category = "main" 432 | description = "Alternative regular expression module, to replace re." 433 | marker = "python_version > \"2.7\"" 434 | name = "regex" 435 | optional = false 436 | python-versions = "*" 437 | version = "2020.7.14" 438 | 439 | [[package]] 440 | category = "main" 441 | description = "Python 2 and 3 compatibility utilities" 442 | name = "six" 443 | optional = false 444 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" 445 | version = "1.15.0" 446 | 447 | [[package]] 448 | category = "dev" 449 | description = "A pure Python implementation of a sliding window memory map manager" 450 | name = "smmap" 451 | optional = false 452 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 453 | version = "3.0.4" 454 | 455 | [[package]] 456 | category = "main" 457 | description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." 458 | name = "tornado" 459 | optional = false 460 | python-versions = ">= 3.5" 461 | version = "6.0.4" 462 | 463 | [[package]] 464 | category = "main" 465 | description = "Fast, Extensible Progress Meter" 466 | marker = "python_version > \"2.7\"" 467 | name = "tqdm" 468 | optional = false 469 | python-versions = ">=2.6, !=3.0.*, !=3.1.*" 470 | version = "4.48.2" 471 | 472 | [package.extras] 473 | dev = ["py-make (>=0.1.0)", "twine", "argopt", "pydoc-markdown"] 474 | 475 | [[package]] 476 | category = "dev" 477 | description = "Measures the displayed width of unicode strings in a terminal" 478 | name = "wcwidth" 479 | optional = false 480 | python-versions = "*" 481 | version = "0.2.5" 482 | 483 | [[package]] 484 | category = "main" 485 | description = "Python binding for xxHash" 486 | name = "xxhash" 487 | optional = false 488 | python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" 489 | version = "1.4.4" 490 | 491 | [[package]] 492 | category = "main" 493 | description = "Backport of pathlib-compatible object wrapper for zip files" 494 | marker = "python_version < \"3.8\"" 495 | name = "zipp" 496 | optional = false 497 | python-versions = ">=2.7" 498 | version = "1.2.0" 499 | 500 | [package.extras] 501 | docs = ["sphinx", "jaraco.packaging (>=3.2)", "rst.linker (>=1.9)"] 502 | testing = ["pathlib2", "unittest2", "jaraco.itertools", "func-timeout"] 503 | 504 | [metadata] 505 | content-hash = "4be7ef11425f598676338c792ba4824223fbf0cb4286c3fc902bb031ad06bf7d" 506 | lock-version = "1.0" 507 | python-versions = "^3.5" 508 | 509 | [metadata.files] 510 | atomicwrites = [ 511 | {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, 512 | {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, 513 | ] 514 | attrs = [ 515 | {file = "attrs-19.3.0-py2.py3-none-any.whl", hash = "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c"}, 516 | {file = "attrs-19.3.0.tar.gz", hash = "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"}, 517 | ] 518 | babel = [ 519 | {file = "Babel-2.8.0-py2.py3-none-any.whl", hash = "sha256:d670ea0b10f8b723672d3a6abeb87b565b244da220d76b4dba1b66269ec152d4"}, 520 | {file = "Babel-2.8.0.tar.gz", hash = "sha256:1aac2ae2d0d8ea368fa90906567f5c08463d98ade155c0c4bfedd6a0f7160e38"}, 521 | ] 522 | click = [ 523 | {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, 524 | {file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"}, 525 | ] 526 | colorama = [ 527 | {file = "colorama-0.4.3-py2.py3-none-any.whl", hash = "sha256:7d73d2a99753107a36ac6b455ee49046802e59d9d076ef8e47b61499fa29afff"}, 528 | {file = "colorama-0.4.3.tar.gz", hash = "sha256:e96da0d330793e2cb9485e9ddfd918d456036c7149416295932478192f4436a1"}, 529 | ] 530 | future = [ 531 | {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, 532 | ] 533 | gitdb = [ 534 | {file = "gitdb-4.0.5-py3-none-any.whl", hash = "sha256:91f36bfb1ab7949b3b40e23736db18231bf7593edada2ba5c3a174a7b23657ac"}, 535 | {file = "gitdb-4.0.5.tar.gz", hash = "sha256:c9e1f2d0db7ddb9a704c2a0217be31214e91a4fe1dea1efad19ae42ba0c285c9"}, 536 | ] 537 | gitpython = [ 538 | {file = "GitPython-3.1.7-py3-none-any.whl", hash = "sha256:fa3b92da728a457dd75d62bb5f3eb2816d99a7fe6c67398e260637a40e3fafb5"}, 539 | {file = "GitPython-3.1.7.tar.gz", hash = "sha256:2db287d71a284e22e5c2846042d0602465c7434d910406990d5b74df4afb0858"}, 540 | ] 541 | htmlmin = [ 542 | {file = "htmlmin-0.1.12.tar.gz", hash = "sha256:50c1ef4630374a5d723900096a961cff426dff46b48f34d194a81bbe14eca178"}, 543 | ] 544 | importlib-metadata = [ 545 | {file = "importlib_metadata-1.7.0-py2.py3-none-any.whl", hash = "sha256:dc15b2969b4ce36305c51eebe62d418ac7791e9a157911d58bfb1f9ccd8e2070"}, 546 | {file = "importlib_metadata-1.7.0.tar.gz", hash = "sha256:90bb658cdbbf6d1735b6341ce708fc7024a3e14e99ffdc5783edea9f9b077f83"}, 547 | ] 548 | jinja2 = [ 549 | {file = "Jinja2-2.11.2-py2.py3-none-any.whl", hash = "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035"}, 550 | {file = "Jinja2-2.11.2.tar.gz", hash = "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0"}, 551 | ] 552 | joblib = [ 553 | {file = "joblib-0.14.1-py2.py3-none-any.whl", hash = "sha256:bdb4fd9b72915ffb49fde2229ce482dd7ae79d842ed8c2b4c932441495af1403"}, 554 | {file = "joblib-0.14.1.tar.gz", hash = "sha256:0630eea4f5664c463f23fbf5dcfc54a2bc6168902719fa8e19daf033022786c8"}, 555 | ] 556 | jsmin = [ 557 | {file = "jsmin-2.2.2.tar.gz", hash = "sha256:b6df99b2cd1c75d9d342e4335b535789b8da9107ec748212706ef7bbe5c2553b"}, 558 | ] 559 | livereload = [ 560 | {file = "livereload-2.6.2.tar.gz", hash = "sha256:d1eddcb5c5eb8d2ca1fa1f750e580da624c0f7fcb734aa5780dc81b7dcbd89be"}, 561 | ] 562 | lunr = [ 563 | {file = "lunr-0.5.8-py2.py3-none-any.whl", hash = "sha256:aab3f489c4d4fab4c1294a257a30fec397db56f0a50273218ccc3efdbf01d6ca"}, 564 | {file = "lunr-0.5.8.tar.gz", hash = "sha256:c4fb063b98eff775dd638b3df380008ae85e6cb1d1a24d1cd81a10ef6391c26e"}, 565 | ] 566 | markdown = [ 567 | {file = "Markdown-3.2.2-py3-none-any.whl", hash = "sha256:c467cd6233885534bf0fe96e62e3cf46cfc1605112356c4f9981512b8174de59"}, 568 | {file = "Markdown-3.2.2.tar.gz", hash = "sha256:1fafe3f1ecabfb514a5285fca634a53c1b32a81cb0feb154264d55bf2ff22c17"}, 569 | ] 570 | markupsafe = [ 571 | {file = "MarkupSafe-1.1.1-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161"}, 572 | {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7"}, 573 | {file = "MarkupSafe-1.1.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183"}, 574 | {file = "MarkupSafe-1.1.1-cp27-cp27m-win32.whl", hash = "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b"}, 575 | {file = "MarkupSafe-1.1.1-cp27-cp27m-win_amd64.whl", hash = "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e"}, 576 | {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f"}, 577 | {file = "MarkupSafe-1.1.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1"}, 578 | {file = "MarkupSafe-1.1.1-cp34-cp34m-macosx_10_6_intel.whl", hash = "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5"}, 579 | {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1"}, 580 | {file = "MarkupSafe-1.1.1-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735"}, 581 | {file = "MarkupSafe-1.1.1-cp34-cp34m-win32.whl", hash = "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21"}, 582 | {file = "MarkupSafe-1.1.1-cp34-cp34m-win_amd64.whl", hash = "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235"}, 583 | {file = "MarkupSafe-1.1.1-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b"}, 584 | {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f"}, 585 | {file = "MarkupSafe-1.1.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905"}, 586 | {file = "MarkupSafe-1.1.1-cp35-cp35m-win32.whl", hash = "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1"}, 587 | {file = "MarkupSafe-1.1.1-cp35-cp35m-win_amd64.whl", hash = "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d"}, 588 | {file = "MarkupSafe-1.1.1-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff"}, 589 | {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473"}, 590 | {file = "MarkupSafe-1.1.1-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e"}, 591 | {file = "MarkupSafe-1.1.1-cp36-cp36m-win32.whl", hash = "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66"}, 592 | {file = "MarkupSafe-1.1.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5"}, 593 | {file = "MarkupSafe-1.1.1-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d"}, 594 | {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e"}, 595 | {file = "MarkupSafe-1.1.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6"}, 596 | {file = "MarkupSafe-1.1.1-cp37-cp37m-win32.whl", hash = "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2"}, 597 | {file = "MarkupSafe-1.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c"}, 598 | {file = "MarkupSafe-1.1.1.tar.gz", hash = "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b"}, 599 | ] 600 | mkdocs = [ 601 | {file = "mkdocs-1.1.2-py3-none-any.whl", hash = "sha256:096f52ff52c02c7e90332d2e53da862fde5c062086e1b5356a6e392d5d60f5e9"}, 602 | {file = "mkdocs-1.1.2.tar.gz", hash = "sha256:f0b61e5402b99d7789efa032c7a74c90a20220a9c81749da06dbfbcbd52ffb39"}, 603 | ] 604 | mkdocs-git-revision-date-localized-plugin = [ 605 | {file = "mkdocs-git-revision-date-localized-plugin-0.7.tar.gz", hash = "sha256:2ddfd2e390c512d9f8fcd01746fb6516985f6f6f22933789168775b4abc6d08c"}, 606 | {file = "mkdocs_git_revision_date_localized_plugin-0.7-py3-none-any.whl", hash = "sha256:3e40126fc5d48c9d3af0f1070dbb1c781239b716b7e217a961b1eef1bffd123f"}, 607 | ] 608 | mkdocs-material = [ 609 | {file = "mkdocs-material-4.6.3.tar.gz", hash = "sha256:1d486635b03f5a2ec87325842f7b10c7ae7daa0eef76b185572eece6a6ea212c"}, 610 | {file = "mkdocs_material-4.6.3-py2.py3-none-any.whl", hash = "sha256:7f3afa0a09c07d0b89a6a9755fdb00513aee8f0cec3538bb903325c80f66f444"}, 611 | ] 612 | mkdocs-minify-plugin = [ 613 | {file = "mkdocs-minify-plugin-0.3.0.tar.gz", hash = "sha256:06fecd8ddb9cb90f30bcee2d94c3d4b46a090f403d7ff0edff089a435906c4ee"}, 614 | {file = "mkdocs_minify_plugin-0.3.0-py2-none-any.whl", hash = "sha256:9bac96276b1681debb3eb2cf5bae972586b4c1138e8d78ea63a984ea6276563d"}, 615 | ] 616 | mkdocs-redirects = [ 617 | {file = "mkdocs-redirects-1.0.1.tar.gz", hash = "sha256:0a21301bee7f743d622226a6869f4718f3bfb26887985e23aafac7f7f6b050aa"}, 618 | ] 619 | more-itertools = [ 620 | {file = "more-itertools-8.4.0.tar.gz", hash = "sha256:68c70cc7167bdf5c7c9d8f6954a7837089c6a36bf565383919bb595efb8a17e5"}, 621 | {file = "more_itertools-8.4.0-py3-none-any.whl", hash = "sha256:b78134b2063dd214000685165d81c154522c3ee0a1c0d4d113c80361c234c5a2"}, 622 | ] 623 | nltk = [ 624 | {file = "nltk-3.5.zip", hash = "sha256:845365449cd8c5f9731f7cb9f8bd6fd0767553b9d53af9eb1b3abf7700936b35"}, 625 | ] 626 | packaging = [ 627 | {file = "packaging-20.4-py2.py3-none-any.whl", hash = "sha256:998416ba6962ae7fbd6596850b80e17859a5753ba17c32284f67bfff33784181"}, 628 | {file = "packaging-20.4.tar.gz", hash = "sha256:4357f74f47b9c12db93624a82154e9b120fa8293699949152b22065d556079f8"}, 629 | ] 630 | pathlib2 = [ 631 | {file = "pathlib2-2.3.5-py2.py3-none-any.whl", hash = "sha256:0ec8205a157c80d7acc301c0b18fbd5d44fe655968f5d947b6ecef5290fc35db"}, 632 | {file = "pathlib2-2.3.5.tar.gz", hash = "sha256:6cd9a47b597b37cc57de1c05e56fb1a1c9cc9fab04fe78c29acd090418529868"}, 633 | ] 634 | pillow = [ 635 | {file = "Pillow-6.2.2-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:834dd023b7f987d6b700ad93dc818098d7eb046bd445e9992b3093c6f9d7a95f"}, 636 | {file = "Pillow-6.2.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:d3a98444a00b4643b22b0685dbf9e0ddcaf4ebfd4ea23f84f228adf5a0765bb2"}, 637 | {file = "Pillow-6.2.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:2b4a94be53dff02af90760c10a2e3634c3c7703410f38c98154d5ce71fe63d20"}, 638 | {file = "Pillow-6.2.2-cp27-cp27m-win32.whl", hash = "sha256:87ef0eca169f7f0bc050b22f05c7e174a65c36d584428431e802c0165c5856ea"}, 639 | {file = "Pillow-6.2.2-cp27-cp27m-win_amd64.whl", hash = "sha256:cbd5647097dc55e501f459dbac7f1d0402225636deeb9e0a98a8d2df649fc19d"}, 640 | {file = "Pillow-6.2.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:4adc3302df4faf77c63ab3a83e1a3e34b94a6a992084f4aa1cb236d1deaf4b39"}, 641 | {file = "Pillow-6.2.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e3a797a079ce289e59dbd7eac9ca3bf682d52687f718686857281475b7ca8e6a"}, 642 | {file = "Pillow-6.2.2-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:bb7861e4618a0c06c40a2e509c1bea207eea5fd4320d486e314e00745a402ca5"}, 643 | {file = "Pillow-6.2.2-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:535e8e0e02c9f1fc2e307256149d6ee8ad3aa9a6e24144b7b6e6fb6126cb0e99"}, 644 | {file = "Pillow-6.2.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:bc149dab804291a18e1186536519e5e122a2ac1316cb80f506e855a500b1cdd4"}, 645 | {file = "Pillow-6.2.2-cp35-cp35m-win32.whl", hash = "sha256:1a3bc8e1db5af40a81535a62a591fafdb30a8a1b319798ea8052aa65ef8f06d2"}, 646 | {file = "Pillow-6.2.2-cp35-cp35m-win_amd64.whl", hash = "sha256:d6b4dc325170bee04ca8292bbd556c6f5398d52c6149ca881e67daf62215426f"}, 647 | {file = "Pillow-6.2.2-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:43ef1cff7ee57f9c8c8e6fa02a62eae9fa23a7e34418c7ce88c0e3fe09d1fb38"}, 648 | {file = "Pillow-6.2.2-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:900de1fdc93764be13f6b39dc0dd0207d9ff441d87ad7c6e97e49b81987dc0f3"}, 649 | {file = "Pillow-6.2.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b83b380f9181cacc994f4c983d95a9c8b00b50bf786c66d235716b526a3332"}, 650 | {file = "Pillow-6.2.2-cp36-cp36m-win32.whl", hash = "sha256:00e0bbe9923adc5cc38a8da7d87d4ce16cde53b8d3bba8886cb928e84522d963"}, 651 | {file = "Pillow-6.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:5ccfcb0a34ad9b77ad247c231edb781763198f405a5c8dc1b642449af821fb7f"}, 652 | {file = "Pillow-6.2.2-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:5dcbbaa3a24d091a64560d3c439a8962866a79a033d40eb1a75f1b3413bfc2bc"}, 653 | {file = "Pillow-6.2.2-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:6e2a7e74d1a626b817ecb7a28c433b471a395c010b2a1f511f976e9ea4363e64"}, 654 | {file = "Pillow-6.2.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:c424d35a5259be559b64490d0fd9e03fba81f1ce8e5b66e0a59de97547351d80"}, 655 | {file = "Pillow-6.2.2-cp37-cp37m-win32.whl", hash = "sha256:aa4792ab056f51b49e7d59ce5733155e10a918baf8ce50f64405db23d5627fa2"}, 656 | {file = "Pillow-6.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0d5c99f80068f13231ac206bd9b2e80ea357f5cf9ae0fa97fab21e32d5b61065"}, 657 | {file = "Pillow-6.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:03457e439d073770d88afdd90318382084732a5b98b0eb6f49454746dbaae701"}, 658 | {file = "Pillow-6.2.2-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ccf16fe444cc43800eeacd4f4769971200982200a71b1368f49410d0eb769543"}, 659 | {file = "Pillow-6.2.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:b72c39585f1837d946bd1a829a4820ccf86e361f28cbf60f5d646f06318b61e2"}, 660 | {file = "Pillow-6.2.2-cp38-cp38-win32.whl", hash = "sha256:3ba7d8f1d962780f86aa747fef0baf3211b80cb13310fff0c375da879c0656d4"}, 661 | {file = "Pillow-6.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:3e81485cec47c24f5fb27acb485a4fc97376b2b332ed633867dc68ac3077998c"}, 662 | {file = "Pillow-6.2.2-pp273-pypy_73-win32.whl", hash = "sha256:aa1b0297e352007ec781a33f026afbb062a9a9895bb103c8f49af434b1666880"}, 663 | {file = "Pillow-6.2.2-pp373-pypy36_pp73-win32.whl", hash = "sha256:82859575005408af81b3e9171ae326ff56a69af5439d3fc20e8cb76cd51c8246"}, 664 | {file = "Pillow-6.2.2.tar.gz", hash = "sha256:db9ff0c251ed066d367f53b64827cc9e18ccea001b986d08c265e53625dab950"}, 665 | ] 666 | pluggy = [ 667 | {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, 668 | {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, 669 | ] 670 | py = [ 671 | {file = "py-1.9.0-py2.py3-none-any.whl", hash = "sha256:366389d1db726cd2fcfc79732e75410e5fe4d31db13692115529d34069a043c2"}, 672 | {file = "py-1.9.0.tar.gz", hash = "sha256:9ca6883ce56b4e8da7e79ac18787889fa5206c79dcc67fb065376cd2fe03f342"}, 673 | ] 674 | pygments = [ 675 | {file = "Pygments-2.6.1-py3-none-any.whl", hash = "sha256:ff7a40b4860b727ab48fad6360eb351cc1b33cbf9b15a0f689ca5353e9463324"}, 676 | {file = "Pygments-2.6.1.tar.gz", hash = "sha256:647344a061c249a3b74e230c739f434d7ea4d8b1d5f3721bc0f3558049b38f44"}, 677 | ] 678 | pymdown-extensions = [ 679 | {file = "pymdown-extensions-7.1.tar.gz", hash = "sha256:5bf93d1ccd8281948cd7c559eb363e59b179b5373478e8a7195cf4b78e3c11b6"}, 680 | {file = "pymdown_extensions-7.1-py2.py3-none-any.whl", hash = "sha256:8f415b21ee86d80bb2c3676f4478b274d0a8ccb13af672a4c86b9ffd22bd005c"}, 681 | ] 682 | pyparsing = [ 683 | {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, 684 | {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, 685 | ] 686 | pytest = [ 687 | {file = "pytest-5.4.3-py3-none-any.whl", hash = "sha256:5c0db86b698e8f170ba4582a492248919255fcd4c79b1ee64ace34301fb589a1"}, 688 | {file = "pytest-5.4.3.tar.gz", hash = "sha256:7979331bfcba207414f5e1263b5a0f8f521d0f457318836a7355531ed1a4c7d8"}, 689 | ] 690 | pytz = [ 691 | {file = "pytz-2020.1-py2.py3-none-any.whl", hash = "sha256:a494d53b6d39c3c6e44c3bec237336e14305e4f29bbf800b599253057fbb79ed"}, 692 | {file = "pytz-2020.1.tar.gz", hash = "sha256:c35965d010ce31b23eeb663ed3cc8c906275d6be1a34393a1d73a41febf4a048"}, 693 | ] 694 | pyyaml = [ 695 | {file = "PyYAML-5.3.1-cp27-cp27m-win32.whl", hash = "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f"}, 696 | {file = "PyYAML-5.3.1-cp27-cp27m-win_amd64.whl", hash = "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76"}, 697 | {file = "PyYAML-5.3.1-cp35-cp35m-win32.whl", hash = "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2"}, 698 | {file = "PyYAML-5.3.1-cp35-cp35m-win_amd64.whl", hash = "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c"}, 699 | {file = "PyYAML-5.3.1-cp36-cp36m-win32.whl", hash = "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2"}, 700 | {file = "PyYAML-5.3.1-cp36-cp36m-win_amd64.whl", hash = "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648"}, 701 | {file = "PyYAML-5.3.1-cp37-cp37m-win32.whl", hash = "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"}, 702 | {file = "PyYAML-5.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf"}, 703 | {file = "PyYAML-5.3.1-cp38-cp38-win32.whl", hash = "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97"}, 704 | {file = "PyYAML-5.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee"}, 705 | {file = "PyYAML-5.3.1.tar.gz", hash = "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d"}, 706 | ] 707 | regex = [ 708 | {file = "regex-2020.7.14-cp27-cp27m-win32.whl", hash = "sha256:e46d13f38cfcbb79bfdb2964b0fe12561fe633caf964a77a5f8d4e45fe5d2ef7"}, 709 | {file = "regex-2020.7.14-cp27-cp27m-win_amd64.whl", hash = "sha256:6961548bba529cac7c07af2fd4d527c5b91bb8fe18995fed6044ac22b3d14644"}, 710 | {file = "regex-2020.7.14-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c50a724d136ec10d920661f1442e4a8b010a4fe5aebd65e0c2241ea41dbe93dc"}, 711 | {file = "regex-2020.7.14-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:8a51f2c6d1f884e98846a0a9021ff6861bdb98457879f412fdc2b42d14494067"}, 712 | {file = "regex-2020.7.14-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:9c568495e35599625f7b999774e29e8d6b01a6fb684d77dee1f56d41b11b40cd"}, 713 | {file = "regex-2020.7.14-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:51178c738d559a2d1071ce0b0f56e57eb315bcf8f7d4cf127674b533e3101f88"}, 714 | {file = "regex-2020.7.14-cp36-cp36m-win32.whl", hash = "sha256:9eddaafb3c48e0900690c1727fba226c4804b8e6127ea409689c3bb492d06de4"}, 715 | {file = "regex-2020.7.14-cp36-cp36m-win_amd64.whl", hash = "sha256:14a53646369157baa0499513f96091eb70382eb50b2c82393d17d7ec81b7b85f"}, 716 | {file = "regex-2020.7.14-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:1269fef3167bb52631ad4fa7dd27bf635d5a0790b8e6222065d42e91bede4162"}, 717 | {file = "regex-2020.7.14-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:d0a5095d52b90ff38592bbdc2644f17c6d495762edf47d876049cfd2968fbccf"}, 718 | {file = "regex-2020.7.14-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:4c037fd14c5f4e308b8370b447b469ca10e69427966527edcab07f52d88388f7"}, 719 | {file = "regex-2020.7.14-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:bc3d98f621898b4a9bc7fecc00513eec8f40b5b83913d74ccb445f037d58cd89"}, 720 | {file = "regex-2020.7.14-cp37-cp37m-win32.whl", hash = "sha256:46bac5ca10fb748d6c55843a931855e2727a7a22584f302dd9bb1506e69f83f6"}, 721 | {file = "regex-2020.7.14-cp37-cp37m-win_amd64.whl", hash = "sha256:0dc64ee3f33cd7899f79a8d788abfbec168410be356ed9bd30bbd3f0a23a7204"}, 722 | {file = "regex-2020.7.14-cp38-cp38-manylinux1_i686.whl", hash = "sha256:5ea81ea3dbd6767873c611687141ec7b06ed8bab43f68fad5b7be184a920dc99"}, 723 | {file = "regex-2020.7.14-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:bbb332d45b32df41200380fff14712cb6093b61bd142272a10b16778c418e98e"}, 724 | {file = "regex-2020.7.14-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:c11d6033115dc4887c456565303f540c44197f4fc1a2bfb192224a301534888e"}, 725 | {file = "regex-2020.7.14-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:75aaa27aa521a182824d89e5ab0a1d16ca207318a6b65042b046053cfc8ed07a"}, 726 | {file = "regex-2020.7.14-cp38-cp38-win32.whl", hash = "sha256:d6cff2276e502b86a25fd10c2a96973fdb45c7a977dca2138d661417f3728341"}, 727 | {file = "regex-2020.7.14-cp38-cp38-win_amd64.whl", hash = "sha256:7a2dd66d2d4df34fa82c9dc85657c5e019b87932019947faece7983f2089a840"}, 728 | {file = "regex-2020.7.14.tar.gz", hash = "sha256:3a3af27a8d23143c49a3420efe5b3f8cf1a48c6fc8bc6856b03f638abc1833bb"}, 729 | ] 730 | six = [ 731 | {file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"}, 732 | {file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"}, 733 | ] 734 | smmap = [ 735 | {file = "smmap-3.0.4-py2.py3-none-any.whl", hash = "sha256:54c44c197c819d5ef1991799a7e30b662d1e520f2ac75c9efbeb54a742214cf4"}, 736 | {file = "smmap-3.0.4.tar.gz", hash = "sha256:9c98bbd1f9786d22f14b3d4126894d56befb835ec90cef151af566c7e19b5d24"}, 737 | ] 738 | tornado = [ 739 | {file = "tornado-6.0.4-cp35-cp35m-win32.whl", hash = "sha256:5217e601700f24e966ddab689f90b7ea4bd91ff3357c3600fa1045e26d68e55d"}, 740 | {file = "tornado-6.0.4-cp35-cp35m-win_amd64.whl", hash = "sha256:c98232a3ac391f5faea6821b53db8db461157baa788f5d6222a193e9456e1740"}, 741 | {file = "tornado-6.0.4-cp36-cp36m-win32.whl", hash = "sha256:5f6a07e62e799be5d2330e68d808c8ac41d4a259b9cea61da4101b83cb5dc673"}, 742 | {file = "tornado-6.0.4-cp36-cp36m-win_amd64.whl", hash = "sha256:c952975c8ba74f546ae6de2e226ab3cc3cc11ae47baf607459a6728585bb542a"}, 743 | {file = "tornado-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:2c027eb2a393d964b22b5c154d1a23a5f8727db6fda837118a776b29e2b8ebc6"}, 744 | {file = "tornado-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:5618f72e947533832cbc3dec54e1dffc1747a5cb17d1fd91577ed14fa0dc081b"}, 745 | {file = "tornado-6.0.4-cp38-cp38-win32.whl", hash = "sha256:22aed82c2ea340c3771e3babc5ef220272f6fd06b5108a53b4976d0d722bcd52"}, 746 | {file = "tornado-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:c58d56003daf1b616336781b26d184023ea4af13ae143d9dda65e31e534940b9"}, 747 | {file = "tornado-6.0.4.tar.gz", hash = "sha256:0fe2d45ba43b00a41cd73f8be321a44936dc1aba233dee979f17a042b83eb6dc"}, 748 | ] 749 | tqdm = [ 750 | {file = "tqdm-4.48.2-py2.py3-none-any.whl", hash = "sha256:1a336d2b829be50e46b84668691e0a2719f26c97c62846298dd5ae2937e4d5cf"}, 751 | {file = "tqdm-4.48.2.tar.gz", hash = "sha256:564d632ea2b9cb52979f7956e093e831c28d441c11751682f84c86fc46e4fd21"}, 752 | ] 753 | wcwidth = [ 754 | {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, 755 | {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, 756 | ] 757 | xxhash = [ 758 | {file = "xxhash-1.4.4-cp27-cp27m-macosx_10_6_intel.whl", hash = "sha256:cbd52ee825981a4af0c4136b8daa3586576461d42968e3d175eeaaba61fff45e"}, 759 | {file = "xxhash-1.4.4-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:a59339e3c0fb90dbdfb160647e93c951fa7bc5e18a7b57e6f225a78053897f13"}, 760 | {file = "xxhash-1.4.4-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:1dabed0e0e45178247ea797bea84bb5e2b988d8e10e48ad191af63a3ee699790"}, 761 | {file = "xxhash-1.4.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:a707f6eef957fcc6305d0e9d2fa59ce38e2a8f815d22a0edfeb3806bf587d4cf"}, 762 | {file = "xxhash-1.4.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:6f861e6c2ce6df10f575d4ae54f7328b8cc1ed33ac8612bdcd188ca0e91646dc"}, 763 | {file = "xxhash-1.4.4-cp27-cp27m-win32.whl", hash = "sha256:39ddc2210159695275f47e816b04ae2ed343d379bffb4b012278fe1eade4d47e"}, 764 | {file = "xxhash-1.4.4-cp27-cp27m-win_amd64.whl", hash = "sha256:5ecb316e63b6e16d22b578c699829bcf0fbaf149365f9753733cb9a53ebc85c6"}, 765 | {file = "xxhash-1.4.4-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f2e24063187e1d87e8ffedbd941f136a627bc18a801234595b65028bd76f28f4"}, 766 | {file = "xxhash-1.4.4-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:5098f5550a6a1ed4b41ecf0ca613b23c10647feba99dfb6c75dfb59baa41a7b2"}, 767 | {file = "xxhash-1.4.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:7fb6c82ac4c4ddef7b142e7f59e00b02449e4749a58659e920132e16b51ca899"}, 768 | {file = "xxhash-1.4.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:19ebe273b0f50e4bfb55d30a59c8f87a3c8160aa3b42d075c2cce806428eef07"}, 769 | {file = "xxhash-1.4.4-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:4a9a436c9ce2e8a59133a7b95ec92c88618fb8afdf9c2ad64c4cdb4ad17acb32"}, 770 | {file = "xxhash-1.4.4-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:7e9f12ee22e09cb25ccdec4658f7fb576931c310c7afa98911ab6254e1a0b3ea"}, 771 | {file = "xxhash-1.4.4-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:ae9e6dabef8a8e25a6473d49078e453cb3da449bd27b2bbbe2d99821b8656498"}, 772 | {file = "xxhash-1.4.4-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:4dc2bbf722b05c7e6cc135a46044eebd58c123b7c7f06fef2bd673bdd810c2ff"}, 773 | {file = "xxhash-1.4.4-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:c6fe4008526a8fa833ef89fb2eea8073c06aaea1e2fba3c6356db5d23ab9bb05"}, 774 | {file = "xxhash-1.4.4-cp35-cp35m-manylinux2014_aarch64.whl", hash = "sha256:69544fc19c42d8436274fc8f2e9fe43a8792585e6b9659f2b4e2d64a6803c29c"}, 775 | {file = "xxhash-1.4.4-cp35-cp35m-win32.whl", hash = "sha256:44d2e46d3a67d00587f8b41cfc816bcc3bf29f80f7f584958ee95c49bbdb2da1"}, 776 | {file = "xxhash-1.4.4-cp35-cp35m-win_amd64.whl", hash = "sha256:b6f6d54a07e45034eab74e8986b7b5adeb883440217e73f8697c5b3eb1e78586"}, 777 | {file = "xxhash-1.4.4-cp36-cp36m-macosx_10_6_intel.whl", hash = "sha256:6ddaff69778872d8ea378e7f62279da36233b4d0099aa9de4ced1d6e06381270"}, 778 | {file = "xxhash-1.4.4-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:a3c0ec5a448e9007b4360f91ef0c443734660ffa92b5c3bcd86139ac16f7666d"}, 779 | {file = "xxhash-1.4.4-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:b78d9f9f2f56698b5111a9b3e7b1467821b17f6d4e7c1002f270ad221fddcaac"}, 780 | {file = "xxhash-1.4.4-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:0ca2e6a729ef3ce66759aa5259171dce94426c39b572f3814734b52d5005b1c6"}, 781 | {file = "xxhash-1.4.4-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:3b4e8f7ff5da774f8016cc5e47bd019e5c01b3734e4fd23b3ec3016fc33d4842"}, 782 | {file = "xxhash-1.4.4-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:aa888c1d38810bd4be24698588244327951ef04e7958f01ab6a7f23b7e8432e8"}, 783 | {file = "xxhash-1.4.4-cp36-cp36m-win32.whl", hash = "sha256:0de8d7159d92d7289c8ed008027177dd2559d689b0a4041eee1100a0c6ddcd10"}, 784 | {file = "xxhash-1.4.4-cp36-cp36m-win_amd64.whl", hash = "sha256:af16d7a474e6bb2a20822b314190667005899d53bd7358e1d52eed5167d82efc"}, 785 | {file = "xxhash-1.4.4-cp37-cp37m-macosx_10_6_intel.whl", hash = "sha256:63669092361961b86658c849c03c3231a2afe765a7842348a61996bd47d3087a"}, 786 | {file = "xxhash-1.4.4-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2594670a5b95e71fd4726ccdb449c64ccf56d73197267229c70593ab7b01d7b5"}, 787 | {file = "xxhash-1.4.4-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:0bada61df83fd308ce2876a15843b599df057d3e4a1927328dfcac2908375ccd"}, 788 | {file = "xxhash-1.4.4-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:55e564fc0cb088cbb642062037c0ca432b8243f38d562162d186cf6959cfc4c8"}, 789 | {file = "xxhash-1.4.4-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:09118cea99cfe1499a62c367ac529c8dc90571051eaa137b0464a58a4bf5f65d"}, 790 | {file = "xxhash-1.4.4-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:8d2497bf52cb5c128404c95c0d2d82090162cc372a3c49dc9fee91f4c597821f"}, 791 | {file = "xxhash-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:6aff4aabd5a8832366f79dc9f4001116a4e286e9f7cfc8d26109e1dff0a3f9e1"}, 792 | {file = "xxhash-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:a33c540761fbef559cd589bf83b9326f1147132ada813f8f241f75b99b8aadbc"}, 793 | {file = "xxhash-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5e7dedd887e8b0b5e79062c0904185841bd334fe30462a65b0456a020940d55"}, 794 | {file = "xxhash-1.4.4-cp38-cp38-manylinux1_i686.whl", hash = "sha256:de2bef16c9aa2c01d7d700f7c530c3e7aa6262ff5594d2bf79eb8d7973d08cf3"}, 795 | {file = "xxhash-1.4.4-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:1b923d381abfd750ae89b8cf50cb374dd83f9373f70af3fb08065fd88bc0631e"}, 796 | {file = "xxhash-1.4.4-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:fb5bfc1961eb546c2a43311084da46b89b647e66a108018b81548622402bd291"}, 797 | {file = "xxhash-1.4.4-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:394ab8316f7fb610ee2b844611daacf72b95b944ec37b627a2ce198400cb1b99"}, 798 | {file = "xxhash-1.4.4-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b4341623c859919100d7dde47eaf452a11b10cc35ebe489029f521be16819367"}, 799 | {file = "xxhash-1.4.4-cp38-cp38-win32.whl", hash = "sha256:407bb4180fc808c0a5aa67b980ae265c304da4a7ac114a3572772e39ba9d5c9f"}, 800 | {file = "xxhash-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:c757e0bd2dc775322b180bc29eead0d2a845a80079eb2153fc97afe322353bd7"}, 801 | {file = "xxhash-1.4.4-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:1697f30ab1766366efbdc1bebc20be0c737014c51b7617b08a649b076957b5ee"}, 802 | {file = "xxhash-1.4.4-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:553c0db59468290aaad4e3f4964f5e42fe61abd24ffe81529f87fc4e215f72ba"}, 803 | {file = "xxhash-1.4.4-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:26ff6c985b67771fa36c2417d8e834194d9f09574fcbaf1c2ca8ad9ebc57f687"}, 804 | {file = "xxhash-1.4.4-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:ffec78440eb424eb95a92283e7193d3b75aa27734196a38fadd36d9d8226d6a7"}, 805 | {file = "xxhash-1.4.4.tar.gz", hash = "sha256:7d6df9d217977d085b8abd74b61efa40405ac416f2d8bdacc40826bd5cb1b746"}, 806 | ] 807 | zipp = [ 808 | {file = "zipp-1.2.0-py2.py3-none-any.whl", hash = "sha256:e0d9e63797e483a30d27e09fffd308c59a700d365ec34e93cc100844168bf921"}, 809 | {file = "zipp-1.2.0.tar.gz", hash = "sha256:c70410551488251b0fee67b460fb9a536af8d6f9f008ad10ac51f615b6a521b1"}, 810 | ] 811 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "iscc" 3 | version = "1.0.5" 4 | description = "ISCC: Reference Implementation" 5 | authors = ["Titusz Pan "] 6 | license="BSD 2-clause" 7 | readme = "README.md" 8 | homepage = "https://iscc.codes/" 9 | repository = "https://github.com/iscc/iscc-specs" 10 | keywords=["iscc", "identifier", "media", "content", "hash", "blockchain", "similarity"] 11 | classifiers=[ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Developers", 14 | "License :: OSI Approved :: BSD License", 15 | "Natural Language :: English", 16 | "Programming Language :: Python :: 3.5", 17 | "Programming Language :: Python :: 3.6", 18 | "Programming Language :: Python :: 3.7", 19 | ] 20 | 21 | [tool.poetry.dependencies] 22 | python = "^3.5" 23 | xxhash = "^1" 24 | Pillow = "^6" 25 | mkdocs-redirects = "^1.0.0" 26 | 27 | [tool.poetry.dev-dependencies] 28 | pytest = "^5" 29 | mkdocs = "^1" 30 | mkdocs-material = "^4" 31 | mkdocs-minify-plugin = "^0" 32 | mkdocs-git-revision-date-localized-plugin = "^0" 33 | 34 | [build-system] 35 | requires = ["poetry>=1.0"] 36 | build-backend = "poetry.masonry.api" 37 | -------------------------------------------------------------------------------- /src/iscc/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from iscc.iscc import * 3 | from iscc.const import * 4 | 5 | 6 | __version__ = "1.0.5" 7 | -------------------------------------------------------------------------------- /src/iscc/const.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Constants""" 3 | 4 | # Component Headers 5 | HEAD_MID = b"\x00" 6 | HEAD_CID_T = b"\x10" 7 | HEAD_CID_T_PCF = b"\x11" 8 | HEAD_CID_I = b"\x12" 9 | HEAD_CID_I_PCF = b"\x13" 10 | HEAD_CID_A = b"\x14" 11 | HEAD_CID_A_PCF = b"\x15" 12 | HEAD_CID_V = b"\x16" 13 | HEAD_CID_V_PCF = b"\x17" 14 | HEAD_CID_M = b"\x18" 15 | HEAD_CID_M_PCF = b"\x19" 16 | HEAD_DID = b"\x20" 17 | HEAD_IID = b"\x30" 18 | 19 | # Algorithm Constants 20 | 21 | # Unicode categories to remove during text normalization 22 | UNICODE_FILTER = frozenset( 23 | { 24 | "Cc", 25 | "Cf", 26 | "Cn", 27 | "Co", 28 | "Cs", 29 | "Mc", 30 | "Me", 31 | "Mn", 32 | "Pc", 33 | "Pd", 34 | "Pe", 35 | "Pf", 36 | "Pi", 37 | "Po", 38 | "Ps", 39 | } 40 | ) 41 | 42 | # Common Control Characters considered whitespace 43 | CC_WHITESPACE = ( 44 | "\u0009", # Horizontal Tab (TAB) 45 | "\u000A", # Linefeed (LF) 46 | "\u000D", # Carriage Return (CR) 47 | ) 48 | 49 | SYMBOLS = "C23456789rB1ZEFGTtYiAaVvMmHUPWXKDNbcdefghLjkSnopRqsJuQwxyz" 50 | VALUES = "".join([chr(i) for i in range(58)]) 51 | C2VTABLE = str.maketrans(SYMBOLS, VALUES) 52 | V2CTABLE = str.maketrans(VALUES, SYMBOLS) 53 | INPUT_TRIM = 128 54 | WINDOW_SIZE_MID = 4 55 | WINDOW_SIZE_CID_T = 13 56 | 57 | MAX_INT64 = 2 ** 64 - 1 58 | GEAR1_NORM = 40 59 | GEAR1_MIN = 20 60 | GEAR1_MAX = 640 61 | GEAR1_MASK1 = 0x016118 62 | GEAR1_MASK2 = 0x00A0B1 63 | 64 | GEAR2_NORM = 4096 65 | GEAR2_MIN = 2048 66 | GEAR2_MAX = 65536 67 | GEAR2_MASK1 = 0x0003590703530000 68 | GEAR2_MASK2 = 0x0000D90003530000 69 | 70 | MINHASH_PERMUTATIONS = [ 71 | (853146490016488653, 1089606993368836715), 72 | (1849332765672628665, 726972438868274737), 73 | (1131688930666554379, 66204585613901025), 74 | (1936485333668353377, 1078410179646709132), 75 | (890837126813020267, 1343470117098523467), 76 | (1988249303247129861, 698653121981343911), 77 | (1408894512544874755, 1248486536592473639), 78 | (2140251716176616185, 1447963007834012793), 79 | (1755124413189049421, 1034598851883537815), 80 | (1355916793659431597, 1474008409379745934), 81 | (546586563822844083, 793773480906057541), 82 | (497603761441203021, 980501101461882479), 83 | (2000709902557454173, 963941556313537655), 84 | (1057597903350092207, 233651787311327325), 85 | (1576204252850880253, 243905121737149907), 86 | (2078784234495706739, 570269452476776142), 87 | (1022616668454863635, 297633284648631084), 88 | (2150082342606334489, 1516796967247398557), 89 | (712341150087765807, 1494795672066692649), 90 | (1511757510246096559, 1728741177365151059), 91 | (1525853819909660573, 1029197538967983408), 92 | (1263771796138990131, 1660732464170610344), 93 | (1215963627200985263, 1399769594446678069), 94 | (590069150281426443, 506465470557005705), 95 | (130824646248385081, 1279720146829545181), 96 | (962725325544728503, 860096419955634036), 97 | (1702561325943522847, 411519685280832908), 98 | (296074222435072629, 69539191273403207), 99 | (490211158716051523, 1960489729088056217), 100 | (1255327197241792767, 605092075716397684), 101 | (699458998727907367, 1017496016211653149), 102 | (32930168991409845, 1304834535101321372), 103 | (1985097843455124585, 949013511180032347), 104 | (362027841570125531, 1142776242221098779), 105 | (1903252144040897835, 576980004709031232), 106 | (900391845076405289, 1071272177143100544), 107 | (547470123601853551, 1494527341093835499), 108 | (1689373724032359119, 1073290814142727850), 109 | (845594231933442371, 1285904200674942617), 110 | (400331968021206285, 1277176606329477335), 111 | (174967108345233429, 343788427301735585), 112 | (876513700861085019, 2100915269685487331), 113 | (505848386844809885, 1227711252031557450), 114 | (1920468508342256199, 18593166391963377), 115 | (1292611725303815789, 2101884148332688233), 116 | (963317239501343903, 191808277534686888), 117 | (1730880032297268007, 2170124912729392024), 118 | (284614929850059717, 918430470748151293), 119 | (1185026248283273081, 1831024560113812361), 120 | (2167288823816985197, 1951365515851067694), 121 | (1214905315086686483, 744352348473654499), 122 | (1555253098157439857, 1921518311887826722), 123 | (1048013650291539723, 2020165648600700886), 124 | (1238618594841147605, 1764930142256726985), 125 | (1213502582686547311, 1903893374912839788), 126 | (286300733803129311, 1449378957774802122), 127 | (1250358511639043529, 1435825328374066345), 128 | (407534797452854371, 833197549717762813), 129 | (960869149538623787, 2238991044337210799), 130 | (1722699901467253087, 748955638857938366), 131 | (1325704236119824319, 1834583747494146901), 132 | (196979859428570839, 222012292803592982), 133 | (1669408735473259699, 901238460725547841), 134 | (781336617016068757, 1501611130776083278), 135 | (1098266067998399169, 252607439879480016), 136 | (1029096290873755209, 1830836920644382659), 137 | (812739846428483509, 1454586271201205945), 138 | (2169881403982893233, 701123326708091164), 139 | (1359607697113797935, 410071830926226192), 140 | (1091536557707382549, 1490336338171061046), 141 | (114681553932939819, 2275179250815266246), 142 | (1045116931081485463, 330649240126621873), 143 | (1213599796462591091, 2035365158881816524), 144 | (1442612926077060845, 1034355364158559000), 145 | (1756031352708963265, 1871075133574473361), 146 | (1464880946139292057, 1841358209015281278), 147 | (2250020708259933461, 239376785815016087), 148 | (724863464377112073, 882956145368658445), 149 | (69773460978034507, 1582909132181092862), 150 | (671396618153714907, 2048201890518748211), 151 | (1714581968555756963, 1945737706863748725), 152 | (1092741769522797967, 417364378594666442), 153 | (1429473482081523299, 440501836058396731), 154 | (202375920058644147, 1816096953371555263), 155 | (1448881484563716971, 355849732621157371), 156 | (1511928734060647251, 1539755653401730569), 157 | (66561619491616805, 1718080767548780670), 158 | (1370603980379370069, 1158735260507728929), 159 | (1494541323890039065, 135348932673001715), 160 | (1316312582643208067, 692994612266834808), 161 | (1960792302230354059, 513809688814427061), 162 | (1076562492117846339, 1094892392854250236), 163 | (920942498330268581, 917825197520768721), 164 | (784222790434217639, 42200068259224423), 165 | (2129498860209516943, 1243516270633398404), 166 | (275475632432179293, 741222018005723947), 167 | (2258845039726954909, 501312706344650233), 168 | (806499254784286719, 454954542714357873), 169 | (1937954399052022711, 1963133520760353190), 170 | (222001281118793629, 1726750538874739920), 171 | (1307623898419553715, 1745678868807221025), 172 | (596378382324432713, 95320895308197238), 173 | (1143053900166573071, 1900589223613132472), 174 | (1533598023462319541, 1988311725300002714), 175 | (23638791382176157, 1395514799898133478), 176 | (2219569576407027459, 464591125115301484), 177 | (71141773826556783, 2278263074080835102), 178 | (1818388640254809153, 2068542555708683121), 179 | (501536663370423415, 2084675879954203152), 180 | (419347422027964479, 480363784445985327), 181 | (529660076339540247, 1691942475524520892), 182 | (155541404377593545, 1521386688261351366), 183 | (693227156398074245, 1895741993383583176), 184 | (1213063041964914711, 1534259041796049575), 185 | (1843840504919232965, 668165748954715122), 186 | (1222766661707938725, 615111447467183312), 187 | (1122794988174825159, 922602432521167981), 188 | (418819557664227619, 1432122605161741889), 189 | (190426400552559033, 471810803924483143), 190 | (1966147745747045233, 1452369029097230462), 191 | (576008166811391959, 1423635693232729979), 192 | (534601397270543087, 526916313177348801), 193 | (90597453448583541, 1346919341313124631), 194 | (963196704215243767, 1611825149194211525), 195 | (1412384602117531587, 1172886288991537433), 196 | (1994868388707757991, 1741793070951343683), 197 | (677251789321463389, 1445881108867176875), 198 | (1440715575333055487, 22909053312763337), 199 | (1345976427394144055, 347195363635084995), 200 | (623223221977032189, 400638654727817116), 201 | (229973393774569323, 1927035341004003106), 202 | (1100924927981486349, 2160611223284354784), 203 | (1142011898554619157, 1326721901958911448), 204 | (930280150002399645, 1755751570149532202), 205 | (256376844288243073, 1211370058617372757), 206 | (667647064886375763, 282395776514445198), 207 | (887523374622233489, 1000876068192587000), 208 | (438738791387778287, 1809427504699261534), 209 | (2303322767409643853, 1098679211823433041), 210 | (774850677511300129, 1241765056385641522), 211 | (1303394264914287545, 1284755431339346903), 212 | (30800983110547319, 577367008031816745), 213 | (1274065095153849719, 2025829086559322592), 214 | (2226301266577380223, 681025040779950763), 215 | (2230003637567278053, 906275083377423089), 216 | (1940404327832495849, 2200094770514161145), 217 | (155232665988166157, 2081186345299296727), 218 | (1006460737351558209, 374831829849243822), 219 | (1454611184522745519, 780947004755503428), 220 | (1201238312584161365, 572852516826597254), 221 | (1460216712899454407, 520948816314912678), 222 | (1083430459766309055, 589284061440240932), 223 | (2194066157233035993, 1412046637307019781), 224 | (1555635357808583563, 922505384555302972), 225 | (19844541185119039, 1858010175263466792), 226 | (741253768828065649, 2282103190963219984), 227 | (1081212012956342415, 1040830645870571533), 228 | (692604529379956107, 1614336116059583266), 229 | (2249659420220267707, 2203438353246368047), 230 | (571689073293454359, 101042363635321963), 231 | (1519369915971317387, 987133582396089674), 232 | (1503498466256768565, 34003328920707476), 233 | (624831449073827911, 1406821564018748473), 234 | (523566524560730733, 2149206835176574302), 235 | (400051089267801269, 2027753677426137592), 236 | (1926883841634180865, 1092175325127958507), 237 | (2266664228510473829, 304798152056861662), 238 | (720489687421883995, 1772184317608454989), 239 | (1046160226273956435, 970337005471157286), 240 | (961792785029357631, 578934670696284826), 241 | (1079845340689437947, 2246802187380843162), 242 | (1578017948247993271, 822133722095030739), 243 | (530980257050460361, 1613745449226210684), 244 | (1146615300263939365, 1333208952156098572), 245 | (77805721769554747, 860660131426141556), 246 | (1814123549994096591, 669056521107507681), 247 | (1374741249445251559, 92708650362129618), 248 | (436105780130202935, 1346651453756436045), 249 | (267180305551533365, 1219254489936282127), 250 | (675936151654593631, 1587863486233754297), 251 | (989447341617279643, 1007111213472533853), 252 | (2238684158818820039, 1732148872681962166), 253 | (1331764214305418433, 1781852975639818553), 254 | (598809232779830627, 33929809895439103), 255 | (1530704471618857191, 2176847430331439517), 256 | (560969485294346585, 2232484045903276723), 257 | (553511112399249535, 1136892702219427415), 258 | (1137355176578183129, 2054202913241361058), 259 | (1389417619759458077, 1357435380345254312), 260 | (1963554513091177761, 2301072449958134130), 261 | (1435961994841646313, 1327054849089190233), 262 | (476883306901073571, 1016640105897699543), 263 | (1075203252082498925, 211174321718731811), 264 | (1623230145688621745, 1300924433422090042), 265 | (1108349714537262649, 2200276897035118696), 266 | (2139930061752271683, 1645105813894706674), 267 | (1011184464903726013, 1181515140629003486), 268 | (1858604191563995829, 967815337348467244), 269 | (1210674227935444255, 217533146659530837), 270 | (1568127022686944529, 2124325251355125800), 271 | (387522773954877337, 205422378336623413), 272 | (1517888610267434301, 1953522170573947113), 273 | (700862400191169363, 1344552490737774094), 274 | (967097615699760379, 372059812979546869), 275 | (2262493808499142063, 14879511499043046), 276 | (1090444553077473263, 494437689008248400), 277 | (1582707150152068285, 1818916274072290591), 278 | (2054916210223584085, 2215414844390388342), 279 | (614072241193993649, 2255736517977871755), 280 | (319471570941800695, 221425464440391675), 281 | (2134889262352799711, 1238037478679691122), 282 | (1908720197982149627, 1611953700583020032), 283 | (1706543409596971629, 1149648113078654923), 284 | (1123377802309478593, 2065480490659500424), 285 | (234248550434195227, 838916658758481218), 286 | (129867854034951915, 2034568773939092299), 287 | (1132173899172731285, 1585274956277924856), 288 | (747397599768216393, 1000249192062601970), 289 | (966118328285669639, 890393120791290392), 290 | (1978744473069861947, 1397830565231711853), 291 | (1367649354516096993, 466350397001674941), 292 | (1888859023181744067, 1162881924911396581), 293 | (816698952722704181, 234300191628743592), 294 | (2064623720919573, 889304434558041494), 295 | (813333883477635751, 491451245247792106), 296 | (341622436532982245, 1363358953271550641), 297 | (1377838091044673, 2091031550191230350), 298 | (1225049478976914697, 1181152476972725364), 299 | (1015972483717875537, 444687044135710966), 300 | (2078032864408579469, 1634964244097888403), 301 | (1682213591465198327, 1035226665694375798), 302 | (412305785740128109, 1012868617861784491), 303 | (166693608092683829, 623582405922007707), 304 | (142001737827086217, 1373681019083513008), 305 | (1749349945778548429, 1412245043370335664), 306 | (249487271773300899, 748034660340058220), 307 | (2284010474415810475, 89587669385366596), 308 | (2116614295071563543, 896538142042373209), 309 | (982081696050850971, 238384078753871766), 310 | (1407517150232293031, 1227918329315752077), 311 | (1781334002448441555, 1133783862242472534), 312 | (555204413597398613, 156835912556655276), 313 | (2130854063125005169, 1620084504055877943), 314 | (1533769449207033351, 531348171498495190), 315 | (343511759321043983, 19020177087633722), 316 | (1096846909243812591, 1367659725005456180), 317 | (219853968883601293, 1246568561362943234), 318 | (1623242076811302017, 1270405659154214890), 319 | (1670457544761436411, 63913058899124031), 320 | (1510538207643510749, 316314260758899106), 321 | (755178036278346005, 1607298207013906952), 322 | (1758593919863430495, 910218677289284479), 323 | (1798669847980462979, 1377363600250827915), 324 | (1184950555241875363, 1034879193665968156), 325 | (949293229786807261, 1390525603722466192), 326 | (577374388210505249, 1452060099744603134), 327 | ] 328 | 329 | CHUNKING_GEAR = [ 330 | 9584138480181866666, 331 | 4739450037122062430, 332 | 1042006760432515769, 333 | 10675154520554330663, 334 | 15869016765101259526, 335 | 8970928072383595559, 336 | 1399451202205921674, 337 | 14523822808097149755, 338 | 16268498464839721299, 339 | 10481172452375523505, 340 | 17104617054662428007, 341 | 1589812074021361642, 342 | 5529368114994898429, 343 | 16097147859444922117, 344 | 7366391750793198740, 345 | 11100538009918328137, 346 | 1389689728615383157, 347 | 4977138822009172500, 348 | 908349889557194910, 349 | 14452518814433479233, 350 | 2122926032271239532, 351 | 591612022955043504, 352 | 9379034436570273189, 353 | 12748258297147873806, 354 | 4307386326245858243, 355 | 13845229916084989633, 356 | 11224472648935237303, 357 | 7047696390035316099, 358 | 2021133566789993437, 359 | 17387162748083618158, 360 | 11746787256992261957, 361 | 6644482612611712714, 362 | 15729398955930993486, 363 | 18187694890389888249, 364 | 13375007170405426180, 365 | 4646676434852504131, 366 | 13152698236329639071, 367 | 899989819383117385, 368 | 1604228284900755822, 369 | 13429168974601667864, 370 | 3706248770764044735, 371 | 3719799868214789934, 372 | 339511817415309475, 373 | 12306710798301877171, 374 | 9844020938499650522, 375 | 13507342816267977422, 376 | 15331217600725578556, 377 | 7506003564454403634, 378 | 17943236144189306428, 379 | 282153689319390566, 380 | 7654271695669749695, 381 | 2650412143911437370, 382 | 6193440044944269691, 383 | 9296646612477743744, 384 | 15077579129862372948, 385 | 67630558006200567, 386 | 11937031764123301943, 387 | 1634327986517329169, 388 | 16073934395340319514, 389 | 11660580892053471307, 390 | 12301495579660351243, 391 | 16908718276972184511, 392 | 6851717516129410187, 393 | 13288278789994352315, 394 | 17482170774163197685, 395 | 12177168157992128323, 396 | 1679876621412537528, 397 | 15666827561093998679, 398 | 4235032027386979601, 399 | 17396011814487376094, 400 | 2036017399572567727, 401 | 4977152437582070133, 402 | 11341111713611820820, 403 | 5866443846249079891, 404 | 5131277185090952872, 405 | 8325299058005558320, 406 | 5701450024662049407, 407 | 15870252139465586153, 408 | 641910037851244477, 409 | 5172232175829573378, 410 | 2261684586607900474, 411 | 11396825283718526131, 412 | 12408680075109652465, 413 | 7761877592432080901, 414 | 13820035802684848169, 415 | 8150091535052795450, 416 | 1103357817677537274, 417 | 13470426615970288837, 418 | 4696524065622673976, 419 | 9336804607285957500, 420 | 13043178028673218162, 421 | 7139020806469476608, 422 | 12450708403507569100, 423 | 2877039905016676547, 424 | 15118872351294838361, 425 | 3277072151995360446, 426 | 1979210712452295885, 427 | 14822651643543876641, 428 | 5849754172112174627, 429 | 13664543478254756807, 430 | 16186972696580520130, 431 | 14259131679517995788, 432 | 1772106294408535188, 433 | 2668205339646827112, 434 | 3734021086026184498, 435 | 4257506854909152229, 436 | 6797729639474582495, 437 | 3708095106171770747, 438 | 15445894064208319783, 439 | 11045733249000282278, 440 | 6925260395759991481, 441 | 6761677416581440942, 442 | 3134957115005596133, 443 | 5496794829211694837, 444 | 225035875953155227, 445 | 18051382753002575119, 446 | 6911658830635795092, 447 | 6648838042848840266, 448 | 7680838377178993211, 449 | 14373546918520540763, 450 | 7385952462173201391, 451 | 7500965322394952100, 452 | 15539214383494689771, 453 | 14355530880918970074, 454 | 4040759991734970063, 455 | 1335151750647325670, 456 | 13713452291232361388, 457 | 8852782707920062625, 458 | 6076783566257059794, 459 | 14451547968886132839, 460 | 6756882940270420653, 461 | 17423128808598833972, 462 | 5877907771709558759, 463 | 14308413074787508328, 464 | 12294727846616188882, 465 | 13766545313722789196, 466 | 7000331838802888702, 467 | 15110028412924060381, 468 | 15869145452552081798, 469 | 10836437530623796047, 470 | 1273143868608979117, 471 | 17728019699248776702, 472 | 379008101491021165, 473 | 6658832383485441856, 474 | 6005905363267598720, 475 | 4792802520786808134, 476 | 17024928019214694263, 477 | 7949301678895773307, 478 | 14602122883430422290, 479 | 6416689239839102410, 480 | 18112987618441438141, 481 | 5424513836620859057, 482 | 12327961344656070412, 483 | 18229731317766561349, 484 | 6214341855555485197, 485 | 14659604854593022088, 486 | 18341976098904231516, 487 | 9093141550798891276, 488 | 4487469223051523007, 489 | 12576621890114680116, 490 | 11368566035561888278, 491 | 16632902625329423294, 492 | 13764076000271015053, 493 | 11494903226088746337, 494 | 14079100963083335535, 495 | 5976601008655555884, 496 | 5685807667042201553, 497 | 16503266544486236927, 498 | 5505089898459277917, 499 | 17076606531971661551, 500 | 939769563919939433, 501 | 17217248958964594832, 502 | 11196454443995107214, 503 | 13253314556391295544, 504 | 17340262486782904124, 505 | 5483165811177129540, 506 | 121736889831618943, 507 | 6318157315988658220, 508 | 14520375112718267902, 509 | 689388276875596813, 510 | 5273319774965020902, 511 | 7975410517565653865, 512 | 13935269057627157047, 513 | 16821796908479891795, 514 | 5882048506860913277, 515 | 18003709489856105216, 516 | 1424933842252756366, 517 | 6634557257081066175, 518 | 16179356916240399588, 519 | 11153419399622634817, 520 | 15654294493035402949, 521 | 2652919763627807814, 522 | 16437183290373292867, 523 | 16903315446495122175, 524 | 3575318971059548300, 525 | 3073697257555445515, 526 | 16187136733800880291, 527 | 15191964085364171996, 528 | 11982016174040399757, 529 | 1948589207658719032, 530 | 14444449012119241408, 531 | 7130754012353479650, 532 | 7480280819583944745, 533 | 3603028513293740433, 534 | 7021162527209392860, 535 | 2124450348946366496, 536 | 14349140477237426219, 537 | 7396225914272122063, 538 | 16288120608246645021, 539 | 7309794834881975478, 540 | 16746864570463829614, 541 | 9239996606832866982, 542 | 14126189643057989505, 543 | 5785181374404079776, 544 | 16681042508550037223, 545 | 9085478584447523753, 546 | 12879577862603639783, 547 | 13351556131001260565, 548 | 10860701565908202403, 549 | 9109516948909639475, 550 | 2942389181877553466, 551 | 1907923359833671766, 552 | 1700327967934711796, 553 | 4355952370607563279, 554 | 6159416062364401684, 555 | 8120694842642123744, 556 | 4670360822544180192, 557 | 12684384265447906291, 558 | 11518186189217338692, 559 | 14839496566538901930, 560 | 13515715604989800698, 561 | 12135065096961528408, 562 | 9056982071865174221, 563 | 12690699907549395246, 564 | 2080896935929507230, 565 | 14546126411900211421, 566 | 6222235617711806766, 567 | 13387691023848518640, 568 | 1259523422199249803, 569 | 1733690531272524911, 570 | 16691543548458831721, 571 | 3252085970219428027, 572 | 790320086519395195, 573 | 8366099548552136926, 574 | 357423734596052102, 575 | 6375583027298966643, 576 | 88639135753272123, 577 | 13813972796887520980, 578 | 8203570281250814300, 579 | 18377325011640278855, 580 | 2922465295015278442, 581 | 2164203008979443347, 582 | 7447171935848155518, 583 | 3663261456454345351, 584 | 5865411828910435346, 585 | 13570376904595974307, 586 | ] 587 | -------------------------------------------------------------------------------- /src/iscc/iscc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ISCC Reference Implementation""" 3 | from binascii import hexlify 4 | from statistics import median 5 | import math 6 | from io import BytesIO 7 | from hashlib import sha256 8 | import unicodedata 9 | from PIL import Image 10 | import xxhash 11 | from iscc.const import * 12 | 13 | 14 | ############################################################################### 15 | # Top-Level functions for generating ISCC Component Codes # 16 | ############################################################################### 17 | 18 | 19 | def meta_id(title, extra=""): 20 | 21 | # 1. Normalization 22 | title_norm = text_normalize(title, keep_ws=True) 23 | extra_norm = text_normalize(extra, keep_ws=True) 24 | 25 | # 2. Trimming 26 | title_trimmed = text_trim(title_norm) 27 | extra_trimmed = text_trim(extra_norm) 28 | 29 | # 3. Concatenate 30 | concat = "\u0020".join((title_trimmed, extra_trimmed)).strip() 31 | 32 | # 4. Create a list of n-grams 33 | n_grams = sliding_window(concat, width=WINDOW_SIZE_MID) 34 | 35 | # 5. Encode n-grams and create xxhash64-digest 36 | hash_digests = [xxhash.xxh64(s.encode("utf-8")).digest() for s in n_grams] 37 | 38 | # 6. Apply similarity_hash 39 | simhash_digest = similarity_hash(hash_digests) 40 | 41 | # 7. Prepend header-byte 42 | meta_id_digest = HEAD_MID + simhash_digest 43 | 44 | # 8. Encode with base58_iscc 45 | meta_id = encode(meta_id_digest) 46 | 47 | # 9. Return encoded Meta-ID, trimmed `title` and trimmed `extra` data. 48 | return [meta_id, title_trimmed, extra_trimmed] 49 | 50 | 51 | def content_id_text(text, partial=False): 52 | 53 | # 1. Normalize (drop whitespace) 54 | text = text_normalize(text, keep_ws=False) 55 | 56 | # 2. Create 13 character n-grams 57 | ngrams = ("\u0020".join(l) for l in sliding_window(text, WINDOW_SIZE_CID_T)) 58 | 59 | # 3. Create 32-bit features with xxHash32 60 | features = (xxhash.xxh32(s.encode("utf-8")).intdigest() for s in ngrams) 61 | 62 | # 4. Apply minimum_hash 63 | minhash = minimum_hash(features, n=64) 64 | 65 | # 5. Collect least significant bits of first 64 minhash signatures 66 | lsb = "".join([str(x & 1) for x in minhash]) 67 | 68 | # 6. Create 64-bit digests 69 | digest = int(lsb, 2).to_bytes(8, "big", signed=False) 70 | 71 | # 7. Prepend component header 72 | if partial: 73 | content_id_text_digest = HEAD_CID_T_PCF + digest 74 | else: 75 | content_id_text_digest = HEAD_CID_T + digest 76 | 77 | # 8. Encode and return 78 | return encode(content_id_text_digest) 79 | 80 | 81 | def content_id_image(img, partial=False): 82 | 83 | # 1. Normalize image to 2-dimensional pixel array 84 | pixels = image_normalize(img) 85 | 86 | # 2. Calculate image hash 87 | hash_digest = image_hash(pixels) 88 | 89 | # 3. Prepend the 1-byte component header 90 | if partial: 91 | content_id_image_digest = HEAD_CID_I_PCF + hash_digest 92 | else: 93 | content_id_image_digest = HEAD_CID_I + hash_digest 94 | 95 | # 4. Encode and return 96 | return encode(content_id_image_digest) 97 | 98 | 99 | def content_id_mixed(cids, partial=False): 100 | 101 | # 1. Decode CIDs 102 | decoded = (decode(code) for code in cids) 103 | 104 | # 2. Extract first 8-bytes 105 | truncated = [data[:8] for data in decoded] 106 | 107 | # 3. Apply Similarity hash 108 | simhash_digest = similarity_hash(truncated) 109 | 110 | # 4. Prepend component header 111 | if partial: 112 | content_id_mixed_digest = HEAD_CID_M_PCF + simhash_digest 113 | else: 114 | content_id_mixed_digest = HEAD_CID_M + simhash_digest 115 | 116 | # 5. Encode and return 117 | return encode(content_id_mixed_digest) 118 | 119 | 120 | def data_id(data): 121 | 122 | # 1. & 2. XxHash32 over CDC-Chunks 123 | features = (xxhash.xxh32(chunk).intdigest() for chunk in data_chunks(data)) 124 | 125 | # 3. Apply minimum_hash 126 | minhash = minimum_hash(features, n=64) 127 | 128 | # 4. Collect least significant bits 129 | lsb = "".join([str(x & 1) for x in minhash]) 130 | 131 | # 5. Create 64-bit digests 132 | digest = int(lsb, 2).to_bytes(8, "big", signed=False) 133 | 134 | # 6. Prepend the 1-byte header 135 | data_id_digest = HEAD_DID + digest 136 | 137 | # 7. Encode and return 138 | return encode(data_id_digest) 139 | 140 | 141 | def instance_id(data): 142 | 143 | if isinstance(data, str): 144 | data = open(data, "rb") 145 | 146 | if not hasattr(data, "read"): 147 | data = BytesIO(data) 148 | 149 | leaf_node_digests = [] 150 | 151 | while True: 152 | chunk = data.read(64000) 153 | if chunk: 154 | leaf_node_digests.append(sha256d(b"\x00" + chunk)) 155 | else: 156 | break 157 | 158 | top_hash_digest = top_hash(leaf_node_digests) 159 | instance_id_digest = HEAD_IID + top_hash_digest[:8] 160 | 161 | code = encode(instance_id_digest) 162 | hex_hash = hexlify(top_hash_digest).decode("ascii") 163 | 164 | return [code, hex_hash] 165 | 166 | 167 | ############################################################################### 168 | # Content Normalization Functions # 169 | ############################################################################### 170 | 171 | 172 | def text_trim(text): 173 | 174 | return text.encode("utf-8")[:INPUT_TRIM].decode("utf-8", "ignore").strip() 175 | 176 | 177 | def text_normalize(text, keep_ws=False): 178 | 179 | # 1. Convert bytes to str 180 | if isinstance(text, bytes): 181 | text = text.decode("utf-8") 182 | 183 | # 2. Remove leading/trailing whitespace 184 | text_stripped = text.strip() 185 | 186 | # 3. Lower case 187 | text_lower = text_stripped.lower() 188 | 189 | # 4. Decompose with NFD 190 | text_decomposed = unicodedata.normalize("NFD", text_lower) 191 | 192 | # 5. Filter 193 | chars = [] 194 | for c in text_decomposed: 195 | cat = unicodedata.category(c) 196 | if cat not in UNICODE_FILTER: 197 | chars.append(c) 198 | elif c in CC_WHITESPACE: 199 | chars.append(c) 200 | text_filtered = "".join(chars) 201 | 202 | # 6. Keep or remove whitespace (remove duplicate whitespace) 203 | if keep_ws: 204 | wsproc_text = " ".join(text_filtered.split()) 205 | else: 206 | wsproc_text = "".join(text_filtered.split()) 207 | 208 | # 7. Recombine 209 | recombined = unicodedata.normalize("NFKC", wsproc_text) 210 | 211 | return recombined 212 | 213 | 214 | def image_normalize(img): 215 | 216 | if not isinstance(img, Image.Image): 217 | img = Image.open(img) 218 | 219 | # 1. Convert to greyscale 220 | img = img.convert("L") 221 | 222 | # 2. Resize to 32x32 223 | img = img.resize((32, 32), Image.BICUBIC) 224 | 225 | # 3. Create two dimensional array 226 | pixels = [[list(img.getdata())[32 * i + j] for j in range(32)] for i in range(32)] 227 | 228 | return pixels 229 | 230 | 231 | ############################################################################### 232 | # Feature Hashing # 233 | ############################################################################### 234 | 235 | 236 | def similarity_hash(hash_digests): 237 | 238 | n_bytes = len(hash_digests[0]) 239 | n_bits = n_bytes * 8 240 | vector = [0] * n_bits 241 | 242 | for digest in hash_digests: 243 | 244 | assert len(digest) == n_bytes 245 | h = int.from_bytes(digest, "big", signed=False) 246 | 247 | for i in range(n_bits): 248 | vector[i] += h & 1 249 | h >>= 1 250 | 251 | minfeatures = len(hash_digests) * 1.0 / 2 252 | shash = 0 253 | 254 | for i in range(n_bits): 255 | shash |= int(vector[i] >= minfeatures) << i 256 | 257 | return shash.to_bytes(n_bytes, "big", signed=False) 258 | 259 | 260 | def minimum_hash(features, n=64): 261 | features = list(features) 262 | max_int64 = (1 << 64) - 1 263 | mersenne_prime = (1 << 61) - 1 264 | max_hash = (1 << 32) - 1 265 | return [ 266 | min((((a * f + b) & max_int64) % mersenne_prime) & max_hash for f in features) 267 | for a, b in MINHASH_PERMUTATIONS[:n] 268 | ] 269 | 270 | 271 | def image_hash(pixels): 272 | 273 | # 1. DCT per row 274 | dct_row_lists = [] 275 | for pixel_list in pixels: 276 | dct_row_lists.append(dct(pixel_list)) 277 | 278 | # 2. DCT per col 279 | dct_row_lists_t = list(map(list, zip(*dct_row_lists))) 280 | dct_col_lists_t = [] 281 | for dct_list in dct_row_lists_t: 282 | dct_col_lists_t.append(dct(dct_list)) 283 | 284 | dct_lists = list(map(list, zip(*dct_col_lists_t))) 285 | 286 | # 3. Extract upper left 8x8 corner 287 | flat_list = [x for sublist in dct_lists[:8] for x in sublist[:8]] 288 | 289 | # 4. Calculate median 290 | med = median(flat_list) 291 | 292 | # 5. Create 64-bit digest by comparing to median 293 | bitstring = "" 294 | for value in flat_list: 295 | if value > med: 296 | bitstring += "1" 297 | else: 298 | bitstring += "0" 299 | hash_digest = int(bitstring, 2).to_bytes(8, "big", signed=False) 300 | 301 | return hash_digest 302 | 303 | 304 | def top_hash(hashes): 305 | 306 | size = len(hashes) 307 | if size == 1: 308 | return hashes[0] 309 | 310 | pairwise_hashed = [] 311 | 312 | for i in range(0, len(hashes) - 1, 2): 313 | pairwise_hashed.append(hash_inner_nodes(hashes[i], hashes[i + 1])) 314 | 315 | if size % 2 == 1: 316 | pairwise_hashed.append(hash_inner_nodes(hashes[-1], hashes[-1])) 317 | 318 | return top_hash(pairwise_hashed) 319 | 320 | 321 | def sha256d(data): 322 | 323 | return sha256(sha256(data).digest()).digest() 324 | 325 | 326 | def hash_inner_nodes(a, b): 327 | 328 | return sha256d(b"\x01" + a + b) 329 | 330 | 331 | def data_chunks(data): 332 | 333 | if isinstance(data, str): 334 | data = open(data, "rb") 335 | 336 | if not hasattr(data, "read"): 337 | data = BytesIO(data) 338 | 339 | section = data.read(GEAR1_MAX) 340 | counter = 0 341 | while True: 342 | if counter < 100: 343 | if len(section) < GEAR1_MAX: 344 | section += data.read(GEAR1_MAX) 345 | if len(section) == 0: 346 | break 347 | boundary = chunk_length( 348 | section, GEAR1_NORM, GEAR1_MIN, GEAR1_MAX, GEAR1_MASK1, GEAR1_MASK2 349 | ) 350 | else: 351 | if len(section) < GEAR2_MAX: 352 | section += data.read(GEAR2_MAX) 353 | if len(section) == 0: 354 | break 355 | boundary = chunk_length( 356 | section, GEAR2_NORM, GEAR2_MIN, GEAR2_MAX, GEAR2_MASK1, GEAR2_MASK2 357 | ) 358 | 359 | yield section[:boundary] 360 | section = section[boundary:] 361 | counter += 1 362 | 363 | 364 | def chunk_length(data, norm_size, min_size, max_size, mask_1, mask_2): 365 | 366 | data_length = len(data) 367 | i = min_size 368 | pattern = 0 369 | 370 | if data_length <= min_size: 371 | return data_length 372 | 373 | barrier = min(norm_size, data_length) 374 | while i < barrier: 375 | pattern = ((pattern << 1) + CHUNKING_GEAR[data[i]]) & MAX_INT64 376 | if not pattern & mask_1: 377 | return i 378 | i = i + 1 379 | 380 | barrier = min(max_size, data_length) 381 | while i < barrier: 382 | pattern = ((pattern << 1) + CHUNKING_GEAR[data[i]]) & MAX_INT64 383 | if not pattern & mask_2: 384 | return i 385 | i = i + 1 386 | return i 387 | 388 | 389 | def sliding_window(seq, width): 390 | 391 | assert width >= 2, "Sliding window width must be 2 or bigger." 392 | idx = range(max(len(seq) - width + 1, 1)) 393 | return (seq[i : i + width] for i in idx) 394 | 395 | 396 | def dct(values_list): 397 | """ 398 | Discrete cosine transform algorithm by Project Nayuki. (MIT License) 399 | See: https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms 400 | """ 401 | 402 | n = len(values_list) 403 | if n == 1: 404 | return list(values_list) 405 | elif n == 0 or n % 2 != 0: 406 | raise ValueError() 407 | else: 408 | half = n // 2 409 | alpha = [(values_list[i] + values_list[-(i + 1)]) for i in range(half)] 410 | beta = [ 411 | (values_list[i] - values_list[-(i + 1)]) 412 | / (math.cos((i + 0.5) * math.pi / n) * 2.0) 413 | for i in range(half) 414 | ] 415 | alpha = dct(alpha) 416 | beta = dct(beta) 417 | result = [] 418 | for i in range(half - 1): 419 | result.append(alpha[i]) 420 | result.append(beta[i] + beta[i + 1]) 421 | result.append(alpha[-1]) 422 | result.append(beta[-1]) 423 | return result 424 | 425 | 426 | def distance(a, b): 427 | 428 | if isinstance(a, str) and isinstance(b, str): 429 | a = decode(a)[1:] 430 | b = decode(b)[1:] 431 | 432 | if isinstance(a, bytes) and isinstance(b, bytes): 433 | a = int.from_bytes(a, "big", signed=False) 434 | b = int.from_bytes(b, "big", signed=False) 435 | 436 | return bin(a ^ b).count("1") 437 | 438 | 439 | def encode(digest): 440 | 441 | if len(digest) == 9: 442 | return encode(digest[:1]) + encode(digest[1:]) 443 | assert len(digest) in (1, 8), "Digest must be 1, 8 or 9 bytes long" 444 | digest = reversed(digest) 445 | value = 0 446 | numvalues = 1 447 | for octet in digest: 448 | octet *= numvalues 449 | value += octet 450 | numvalues *= 256 451 | chars = [] 452 | while numvalues > 0: 453 | chars.append(value % 58) 454 | value //= 58 455 | numvalues //= 58 456 | return str.translate("".join([chr(c) for c in reversed(chars)]), V2CTABLE) 457 | 458 | 459 | def decode(code): 460 | 461 | n = len(code) 462 | if n == 13: 463 | return decode(code[:2]) + decode(code[2:]) 464 | if n == 2: 465 | bit_length = 8 466 | elif n == 11: 467 | bit_length = 64 468 | else: 469 | raise ValueError("Code must be 2, 11 or 13 chars. Not %s" % n) 470 | code = reversed(str.translate(code, C2VTABLE)) 471 | value = 0 472 | numvalues = 1 473 | for c in code: 474 | c = ord(c) 475 | c *= numvalues 476 | value += c 477 | numvalues *= 58 478 | numvalues = 2 ** bit_length 479 | data = [] 480 | while numvalues > 1: 481 | data.append(value % 256) 482 | value //= 256 483 | numvalues //= 256 484 | return bytes(reversed(data)) 485 | -------------------------------------------------------------------------------- /src/iscc/iscc.pyi: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from typing import * 3 | from PIL import Image 4 | from io import BytesIO 5 | 6 | B = TypeVar("B", str, BinaryIO, bytes) 7 | IMG = TypeVar("I", str, BytesIO, Image.Image) 8 | TEXT = TypeVar("TEXT", str, bytes) 9 | 10 | # Top Level Functions 11 | def meta_id( 12 | title: Union[str, bytes], extra: Union[str, bytes] = "" 13 | ) -> Tuple[str, str, str]: ... 14 | def content_id_text(text: Union[str, bytes], partial=False) -> str: ... 15 | def content_id_image(img: IMG, partial: bool = False) -> str: ... 16 | def content_id_mixed(cids: List[str], partial: bool = False) -> str: ... 17 | def data_id(data: B) -> str: ... 18 | def instance_id(data: B) -> Tuple[str, str]: ... 19 | 20 | # Content Normalization 21 | def text_pre_normalize(text: TEXT) -> str: ... 22 | def text_trim(text: str) -> str: ... 23 | def text_normalize(text: str, keep_ws: bool = False) -> str: ... 24 | def image_normalize(img: IMG) -> List[List[int]]: ... 25 | 26 | # Feature Hashing 27 | def similarity_hash(hash_digests: Sequence[ByteString]) -> bytes: ... 28 | def minimum_hash(features: Iterable[int], n: int = 64) -> List[int]: ... 29 | def image_hash(pixels: List[List[int]]) -> bytes: ... 30 | 31 | # Content-ID-Image utils 32 | def dct(value_list: Sequence[float]) -> Sequence[float]: ... 33 | 34 | # Data-ID utils 35 | def data_chunks(data: B) -> Generator[bytes, None, None]: ... 36 | def chunk_length( 37 | data: bytes, norm_size: int, min_size: int, max_size: int, mask_1: int, mask_2: int 38 | ) -> int: ... 39 | 40 | # Instance-ID helpers 41 | def sha256d(data: bytes) -> bytes: ... 42 | def top_hash(hashes: List[bytes]) -> bytes: ... 43 | def hash_inner_nodes(a: bytes, b: bytes) -> bytes: ... 44 | 45 | # Common untility functions 46 | def sliding_window(seq: Sequence, width: int) -> List: ... 47 | def distance(a: Union[int, str, bytes], b: Union[int, str, bytes]) -> int: ... 48 | def encode(digest: bytes) -> str: ... 49 | def decode(code: str) -> bytes: ... 50 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/__init__.py -------------------------------------------------------------------------------- /tests/build_test_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import iscc 4 | 5 | 6 | REQUIRED = ( 7 | "meta_id", 8 | "content_id_text", 9 | "content_id_image", 10 | "content_id_mixed", 11 | "data_id", 12 | "instance_id", 13 | ) 14 | 15 | 16 | def main(): 17 | data = json.load(open("test_inputs.json", "r", encoding="utf-8")) 18 | 19 | for funcname, tests in data.items(): 20 | for testname, testdata in tests.items(): 21 | func = getattr(iscc, funcname) 22 | args = testdata["inputs"] 23 | result = func(*args) 24 | if funcname in ["data_chunks"]: 25 | result = ["hex:" + data.hex() for data in result] 26 | testdata["outputs"] = result 27 | data[funcname]["required"] = funcname in REQUIRED 28 | with open("test_data.json", "w", encoding="utf-8") as outf: 29 | json.dump(data, outf, indent=2, sort_keys=True, ensure_ascii=False) 30 | 31 | 32 | if __name__ == "__main__": 33 | main() 34 | -------------------------------------------------------------------------------- /tests/file_image_cat.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_cat.gif -------------------------------------------------------------------------------- /tests/file_image_cat.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_cat.jpg -------------------------------------------------------------------------------- /tests/file_image_cat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_cat.png -------------------------------------------------------------------------------- /tests/file_image_lenna.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_lenna.jpg -------------------------------------------------------------------------------- /tests/file_image_pixel_png_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_pixel_png_black.png -------------------------------------------------------------------------------- /tests/file_image_pixel_png_transp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_pixel_png_transp.png -------------------------------------------------------------------------------- /tests/file_image_pixel_png_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iscc/iscc-specs/7c5302b5bd71a029e1174f99f85a81e42b220061/tests/file_image_pixel_png_white.png -------------------------------------------------------------------------------- /tests/test_inputs.json: -------------------------------------------------------------------------------- 1 | { 2 | "meta_id": { 3 | "test_001_empty": { 4 | "inputs": [ 5 | "", 6 | "" 7 | ] 8 | }, 9 | "test_002_basic": { 10 | "inputs": [ 11 | "Die Unendliche Geschichte", 12 | "" 13 | ] 14 | }, 15 | "test_003_punctuation": { 16 | "inputs": [ 17 | "Die Unendliche, Geschichte", 18 | "" 19 | ] 20 | }, 21 | "test_004_white_space": { 22 | "inputs": [ 23 | " Die Unendliche, Geschichte ", 24 | "" 25 | ] 26 | }, 27 | "test_005_non_ascii": { 28 | "inputs": [ 29 | "Die unéndlíche, Geschichte", 30 | "" 31 | ] 32 | }, 33 | "test_006_i18n": { 34 | "inputs": [ 35 | "Iñtërnâtiônàlizætiøn☃", 36 | "" 37 | ] 38 | }, 39 | "test_007_extra": { 40 | "inputs": [ 41 | "Die unendliche Geschichte", 42 | "The Movie" 43 | ] 44 | }, 45 | "test_008_long": { 46 | "inputs": [ 47 | "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed", 48 | "" 49 | ] 50 | }, 51 | "test_009_long_i18n": { 52 | "inputs": [ 53 | "Iñtërnâtiônàlizætiøn☃ Iñtërnâtiônàlizætiøn☃ Iñtërnâtiônàlizætiøn☃ Iñtërnâtiônàlizætiøn☃ Iñtërnâtiônàlizætiøn☃ Iñtërnâtiônàlizætiøn☃", 54 | "" 55 | ] 56 | } 57 | }, 58 | "content_id_text": { 59 | "test_001_empty": { 60 | "inputs": [ 61 | "", 62 | false 63 | ] 64 | }, 65 | "test_002_short": { 66 | "inputs": [ 67 | "Some short text", 68 | false 69 | ] 70 | }, 71 | "test_003_long": { 72 | "inputs": [ 73 | "Their most significant and usefull property of similarity-preserving fingerprints gets lost in the fragmentation of individual, propietary and use case specific implementations. The real benefit lies in similarity preservation beyond your local data archive on a global scale accross vendors.", 74 | false 75 | ] 76 | }, 77 | "test_004_partial": { 78 | "inputs": [ 79 | "Some text for partial content id text", 80 | true 81 | ] 82 | } 83 | }, 84 | "content_id_image": { 85 | "test_001_file_image_pixel_png_transp_png": { 86 | "inputs": [ 87 | "file_image_pixel_png_transp.png", 88 | false 89 | ] 90 | }, 91 | "test_002_file_image_pixel_png_white_png": { 92 | "inputs": [ 93 | "file_image_pixel_png_white.png", 94 | false 95 | ] 96 | }, 97 | "test_003_file_image_pixel_png_black_png": { 98 | "inputs": [ 99 | "file_image_pixel_png_black.png", 100 | false 101 | ] 102 | }, 103 | "test_004_file_image_cat_jpg": { 104 | "inputs": [ 105 | "file_image_cat.jpg", 106 | false 107 | ] 108 | }, 109 | "test_005_file_image_cat_png": { 110 | "inputs": [ 111 | "file_image_cat.png", 112 | false 113 | ] 114 | }, 115 | "test_006_file_image_cat_gif": { 116 | "inputs": [ 117 | "file_image_cat.gif", 118 | false 119 | ] 120 | } 121 | }, 122 | "content_id_mixed": { 123 | "test_001_cid_t_one": { 124 | "inputs": [ 125 | [ 126 | "CTZYQRgV5eCQp" 127 | ], 128 | false 129 | ] 130 | }, 131 | "test_002_cid_t_two_partial": { 132 | "inputs": [ 133 | [ 134 | "CTZYQRgV5eCQp", 135 | "CTDVyoiMYHq6F" 136 | ], 137 | true 138 | ] 139 | }, 140 | "test_003_cid_t_and_i": { 141 | "inputs": [ 142 | [ 143 | "CTZYQRgV5eCQp", 144 | "CYDfTq7Qc7Fre" 145 | ], 146 | false 147 | ] 148 | } 149 | }, 150 | "data_chunks": { 151 | "test_001_file_image_cat_jpg": { 152 | "inputs": [ 153 | "file_image_cat.jpg" 154 | ] 155 | }, 156 | "test_002_file_image_cat_png": { 157 | "inputs": [ 158 | "file_image_cat.png" 159 | ] 160 | }, 161 | "test_003_file_image_cat_gif": { 162 | "inputs": [ 163 | "file_image_cat.gif" 164 | ] 165 | } 166 | }, 167 | "data_id": { 168 | "test_001_file_image_cat_jpg": { 169 | "inputs": [ 170 | "file_image_cat.jpg" 171 | ] 172 | }, 173 | "test_002_file_image_cat_png": { 174 | "inputs": [ 175 | "file_image_cat.png" 176 | ] 177 | }, 178 | "test_003_file_image_cat_gif": { 179 | "inputs": [ 180 | "file_image_cat.gif" 181 | ] 182 | } 183 | }, 184 | "image_normalize": { 185 | "test_001_file_image_cat_jpg": { 186 | "inputs": [ 187 | "file_image_cat.jpg" 188 | ] 189 | }, 190 | "test_002_file_image_cat_png": { 191 | "inputs": [ 192 | "file_image_cat.png" 193 | ] 194 | }, 195 | "test_003_file_image_cat_gif": { 196 | "inputs": [ 197 | "file_image_cat.gif" 198 | ] 199 | } 200 | }, 201 | "instance_id": { 202 | "test_001_file_image_cat_jpg": { 203 | "inputs": [ 204 | "file_image_cat.jpg" 205 | ] 206 | }, 207 | "test_002_file_image_cat_png": { 208 | "inputs": [ 209 | "file_image_cat.png" 210 | ] 211 | }, 212 | "test_003_file_image_cat_gif": { 213 | "inputs": [ 214 | "file_image_cat.gif" 215 | ] 216 | } 217 | }, 218 | "minimum_hash": { 219 | "test_001_simple": { 220 | "inputs": [ 221 | [ 222 | 2307709831, 223 | 4057803343, 224 | 1189896175, 225 | 998490104, 226 | 1957593182, 227 | 985638384, 228 | 1499267049, 229 | 3716940741, 230 | 3418313233, 231 | 2481613561 232 | ] 233 | ] 234 | } 235 | }, 236 | "text_normalize": { 237 | "test_001_decompose": { 238 | "inputs": [ 239 | "Ç 가 Ω ℍ ① ︷ i⁹ ¼ dž ⫝̸ ȴ ȷ ɂ ć", 240 | false 241 | ] 242 | }, 243 | "test_002_tricky": { 244 | "inputs": [ 245 | " Iñtërnâtiôn\nàlizætiøn☃💩 – is a tric\t ky \u00A0 thing!\r", 246 | false 247 | ] 248 | }, 249 | "test_002_tricky_ws": { 250 | "inputs": [ 251 | " Iñtërnâtiôn\nàlizætiøn☃💩 – is a tric\t ky \u00A0 thing!\r", 252 | true 253 | ] 254 | } 255 | }, 256 | "text_trim": { 257 | "test_001_empty": { 258 | "inputs": [ 259 | "" 260 | ] 261 | }, 262 | "test_002_long": { 263 | "inputs": [ 264 | "Ç가Ωℍ①︷i⁹¼dž⫝̸ȴȷɂćÇ가Ωℍ①︷i⁹¼dž⫝̸ȴȷɂć küßî ЌύБЇ ¡¢£¤ ƀƁƂƃƄƅƆƇ ɐɑɒɓɔ ʰʱʲʳʴʵʶʷʸ" 265 | ] 266 | }, 267 | "test_002_nbsp": { 268 | "inputs": [ 269 | " " 270 | ] 271 | }, 272 | "test_003_full": { 273 | "inputs": [ 274 | "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquy" 275 | ] 276 | }, 277 | "test_004_empty": { 278 | "inputs": [ 279 | " leading and trailing spaces " 280 | ] 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /tests/test_iscc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import json 4 | import random 5 | from io import BytesIO 6 | import pytest 7 | from PIL import Image, ImageFilter, ImageEnhance 8 | import iscc 9 | 10 | 11 | TESTS_PATH = os.path.dirname(os.path.realpath(__file__)) 12 | os.chdir(TESTS_PATH) 13 | 14 | 15 | TEXT_A = u""" 16 | Their most significant and usefull property of similarity-preserving 17 | fingerprints gets lost in the fragmentation of individual, propietary and 18 | use case specific implementations. The real benefit lies in similarity 19 | preservation beyond your local data archive on a global scale accross 20 | vendors. 21 | """ 22 | 23 | TEXT_B = u""" 24 | The most significant and usefull property of similarity-preserving 25 | fingerprints gets lost in the fragmentation of individual, propietary and 26 | use case specific implementations. The real benefit lies in similarity 27 | preservation beyond your local data archive on a global scale accross 28 | vendors. 29 | """ 30 | 31 | TEXT_C = u""" 32 | A need for open standard fingerprinting. We don´t need the best 33 | Fingerprinting algorithm just an accessible and widely used one. 34 | """ 35 | 36 | 37 | def test_test_data(): 38 | with open("test_data.json", encoding="utf-8") as jfile: 39 | data = json.load(jfile) 40 | assert type(data) == dict 41 | for funcname, tests in data.items(): 42 | for testname, testdata in tests.items(): 43 | if not testname.startswith("test_"): 44 | continue 45 | func = getattr(iscc, funcname) 46 | args = testdata["inputs"] 47 | if funcname in ["data_chunks"]: 48 | testdata["outputs"] = [ 49 | bytes.fromhex(i.split(":")[1]) for i in testdata["outputs"] 50 | ] 51 | result = list(func(*args)) 52 | else: 53 | result = func(*args) 54 | expected = testdata["outputs"] 55 | 56 | assert result == expected, "%s %s " % (funcname, args) 57 | 58 | 59 | def test_meta_id(): 60 | mid1, _, _ = iscc.meta_id("ISCC Content Identifiers") 61 | assert mid1 == "CCDFPFc87MhdT" 62 | 63 | mid1, _, _ = iscc.meta_id(b"ISCC Content Identifiers") 64 | assert mid1 == "CCDFPFc87MhdT" 65 | 66 | mid1, title, extra = iscc.meta_id("Die Unendliche Geschichte") 67 | assert mid1 == "CCAKevDpE1eEL" 68 | assert title == "die unendliche geschichte" 69 | assert extra == "" 70 | mid2 = iscc.meta_id(" Die unéndlíche, Geschichte ")[0] 71 | assert mid1 == mid2 72 | 73 | mid3 = iscc.meta_id("Die Unentliche Geschichte")[0] 74 | assert iscc.distance(mid1, mid3) == 8 75 | 76 | mid4 = iscc.meta_id("Geschichte, Die Unendliche")[0] 77 | assert iscc.distance(mid1, mid4) == 9 78 | 79 | with pytest.raises(UnicodeDecodeError): 80 | iscc.meta_id(b"\xc3\x28") 81 | 82 | 83 | def test_encode(): 84 | digest = bytes.fromhex("f7d3a5b201dc92f7a7") 85 | code = iscc.encode(digest) 86 | assert code == "5GcvF7s13LK2L" 87 | 88 | 89 | def test_decode(): 90 | code = "5GcQF7sC3iY2i" 91 | digest = iscc.decode(code) 92 | assert digest.hex() == "f7d6bd587d22a7cb6d" 93 | 94 | 95 | def test_content_id_text(): 96 | cid_t_np = iscc.content_id_text("") 97 | assert len(cid_t_np) == 13 98 | assert cid_t_np == "CT7A4zpmccuEv" 99 | cid_t_p = iscc.content_id_text("", partial=True) 100 | assert cid_t_p == "Ct7A4zpmccuEv" 101 | assert 0 == iscc.distance(cid_t_p, cid_t_np) 102 | 103 | cid_t_a = iscc.content_id_text(TEXT_A) 104 | cid_t_b = iscc.content_id_text(TEXT_B) 105 | assert iscc.distance(cid_t_a, cid_t_b) == 2 106 | 107 | 108 | def test_text_normalize(): 109 | text = " Iñtërnâtiôn\nàlizætiøn☃💩 – is a tric\t ky \u00A0 thing!\r" 110 | normalized = iscc.text_normalize(text, keep_ws=False) 111 | assert normalized == "internationalizætiøn☃💩isatrickything" 112 | 113 | normalized = iscc.text_normalize(text, keep_ws=True) 114 | assert normalized == "internation alizætiøn☃💩 is a tric ky thing" 115 | 116 | assert iscc.text_normalize(" ") == "" 117 | assert iscc.text_normalize(" Hello World ? ", keep_ws=True) == "hello world" 118 | assert iscc.text_normalize("Hello\nWorld", keep_ws=True) == "hello world" 119 | 120 | 121 | def test_trim_text(): 122 | multibyte_2 = "ü" * 128 123 | trimmed = iscc.text_trim(multibyte_2) 124 | assert 64 == len(trimmed) 125 | assert 128 == len(trimmed.encode("utf-8")) 126 | 127 | multibyte_3 = "驩" * 128 128 | trimmed = iscc.text_trim(multibyte_3) 129 | assert 42 == len(trimmed) 130 | assert 126 == len(trimmed.encode("utf-8")) 131 | 132 | mixed = "Iñtërnâtiônàlizætiøn☃💩" * 6 133 | trimmed = iscc.text_trim(mixed) 134 | assert 85 == len(trimmed) 135 | assert 128 == len(trimmed.encode("utf-8")) 136 | 137 | 138 | def test_sliding_window(): 139 | assert list(iscc.sliding_window("", width=4)) == [""] 140 | assert list(iscc.sliding_window("A", width=4)) == ["A"] 141 | assert list(iscc.sliding_window("Hello", width=4)) == ["Hell", "ello"] 142 | words = ("lorem", "ipsum", "dolor", "sit", "amet") 143 | assert list(iscc.sliding_window(words, 2))[0] == ("lorem", "ipsum") 144 | 145 | 146 | def test_similarity_hash(): 147 | all_zero = 0b0 .to_bytes(8, "big") 148 | assert iscc.similarity_hash((all_zero, all_zero)) == all_zero 149 | 150 | all_ones = 0b11111111 .to_bytes(1, "big") 151 | assert iscc.similarity_hash((all_ones, all_ones)) == all_ones 152 | 153 | a = 0b0110 .to_bytes(1, "big") 154 | b = 0b1100 .to_bytes(1, "big") 155 | r = 0b1110 .to_bytes(1, "big") 156 | assert iscc.similarity_hash((a, b)) == r 157 | 158 | a = 0b01101001 .to_bytes(1, "big") 159 | b = 0b00111000 .to_bytes(1, "big") 160 | c = 0b11100100 .to_bytes(1, "big") 161 | r = 0b01101000 .to_bytes(1, "big") 162 | assert iscc.similarity_hash((a, b, c)) == r 163 | 164 | a = 0b0110100101101001 .to_bytes(2, "big") 165 | b = 0b0011100000111000 .to_bytes(2, "big") 166 | c = 0b1110010011100100 .to_bytes(2, "big") 167 | r = 0b0110100001101000 .to_bytes(2, "big") 168 | assert iscc.similarity_hash((a, b, c)) == r 169 | 170 | 171 | def test_hamming_distance(): 172 | a = 0b0001111 173 | b = 0b1000111 174 | assert iscc.distance(a, b) == 2 175 | 176 | mid1 = iscc.meta_id("Die Unendliche Geschichte", "von Michael Ende")[0] 177 | 178 | # Change one Character 179 | mid2 = iscc.meta_id("Die UnXndliche Geschichte", "von Michael Ende")[0] 180 | assert iscc.distance(mid1, mid2) <= 10 181 | 182 | # Delete one Character 183 | mid2 = iscc.meta_id("Die nendliche Geschichte", "von Michael Ende")[0] 184 | assert iscc.distance(mid1, mid2) <= 14 185 | 186 | # Add one Character 187 | mid2 = iscc.meta_id("Die UnendlicheX Geschichte", "von Michael Ende")[0] 188 | assert iscc.distance(mid1, mid2) <= 13 189 | 190 | # Add, change, delete 191 | mid2 = iscc.meta_id("Diex Unandlische Geschiche", "von Michael Ende")[0] 192 | assert iscc.distance(mid1, mid2) <= 22 193 | 194 | # Change Word order 195 | mid2 = iscc.meta_id("Unendliche Geschichte, Die", "von Michael Ende")[0] 196 | assert iscc.distance(mid1, mid2) <= 13 197 | 198 | # Totaly different 199 | mid2 = iscc.meta_id("Now for something different")[0] 200 | assert iscc.distance(mid1, mid2) >= 24 201 | 202 | 203 | def test_content_id_mixed(): 204 | cid_t_1 = iscc.content_id_text("Some Text") 205 | cid_t_2 = iscc.content_id_text("Another Text") 206 | 207 | cid_m = iscc.content_id_mixed([cid_t_1]) 208 | assert cid_m == "CM3k9pp7JS7nP" 209 | 210 | cid_m = iscc.content_id_mixed([cid_t_1, cid_t_2]) 211 | assert cid_m == "CM3kHkNRGvnhB" 212 | 213 | cid_i = iscc.content_id_image("file_image_lenna.jpg") 214 | cid_m = iscc.content_id_mixed([cid_t_1, cid_t_2, cid_i]) 215 | assert cid_m == "CM3hswzATv9d3" 216 | 217 | 218 | def test_data_id(): 219 | random.seed(1) 220 | data = bytearray([random.getrandbits(8) for _ in range(1000000)]) # 1 mb 221 | did_a = iscc.data_id(data) 222 | assert did_a == "CDK2KdVAz5XTs" 223 | data.insert(500000, 1) 224 | data.insert(500001, 2) 225 | data.insert(500002, 3) 226 | did_b = iscc.data_id(data) 227 | assert did_b == did_b 228 | for x in range(100): # insert 100 bytes random noise 229 | data.insert(random.randint(0, 1000000), random.randint(0, 255)) 230 | did_c = iscc.data_id(data) 231 | assert iscc.distance(did_a, did_c) == 17 232 | 233 | 234 | def test_instance_id(): 235 | zero_bytes_even = b"\x00" * 16 236 | iid, h = iscc.instance_id(zero_bytes_even) 237 | assert iid == "CR8UZLfpaCm1d" 238 | assert h == "2ca7f098709d37d6f6a1a7e0670f49734c735500894aab4dc14d2c13f042dddd" 239 | ff_bytes_uneven = b"\xff" * 17 240 | iid, h = iscc.instance_id(ff_bytes_uneven) 241 | assert iid == "CR6Nh6fvCxHj9" 242 | assert h == "215dadbbb627072c15b2235b521db9896e74d7ef379fdafa731efa52a67d5b7d" 243 | more_bytes = b"\xcc" * 66000 244 | iid, h = iscc.instance_id(more_bytes) 245 | assert h == "db5f55fc6741664fda4ebb364f2cad99f6ac166aedc7551ab0768c6c67218f71" 246 | assert iid == "CRdhBqWwY7u7i" 247 | 248 | 249 | def test_data_chunks(): 250 | with open("file_image_lenna.jpg", "rb") as infile: 251 | chunks1 = list(iscc.data_chunks(infile)) 252 | infile.seek(0) 253 | chunks2 = list(iscc.data_chunks(infile.read())) 254 | assert len(chunks1) == 112 255 | assert len(chunks1[0]) == 38 256 | assert len(chunks1[-1]) == 2840 257 | assert len(chunks2) == 112 258 | assert len(chunks2[0]) == 38 259 | assert len(chunks2[-1]) == 2840 260 | 261 | 262 | def test_content_id_image(): 263 | cid_i = iscc.content_id_image("file_image_lenna.jpg") 264 | assert len(cid_i) == 13 265 | assert cid_i == "CYmLoqBRgV32u" 266 | 267 | data = BytesIO(open("file_image_lenna.jpg", "rb").read()) 268 | cid_i = iscc.content_id_image(data, partial=True) 269 | assert len(cid_i) == 13 270 | assert cid_i == "CimLoqBRgV32u" 271 | 272 | img1 = Image.open("file_image_lenna.jpg") 273 | img2 = img1.filter(ImageFilter.GaussianBlur(10)) 274 | img3 = ImageEnhance.Brightness(img1).enhance(1.4) 275 | img4 = ImageEnhance.Contrast(img1).enhance(1.2) 276 | 277 | cid1 = iscc.content_id_image(img1) 278 | cid2 = iscc.content_id_image(img2) 279 | cid3 = iscc.content_id_image(img3) 280 | cid4 = iscc.content_id_image(img4) 281 | 282 | assert iscc.distance(cid1, cid2) == 0 283 | assert iscc.distance(cid1, cid3) == 2 284 | assert iscc.distance(cid1, cid4) == 0 285 | 286 | 287 | def test_pi(): 288 | """Check that PI has expected value on systemcd """ 289 | import math 290 | 291 | assert math.pi == 3.141592653589793 292 | 293 | 294 | def test_image_normalize(): 295 | assert iscc.image_normalize("file_image_cat.jpg") == [ 296 | [ 297 | 25, 298 | 18, 299 | 14, 300 | 15, 301 | 25, 302 | 79, 303 | 91, 304 | 91, 305 | 106, 306 | 68, 307 | 109, 308 | 100, 309 | 99, 310 | 93, 311 | 74, 312 | 69, 313 | 58, 314 | 51, 315 | 52, 316 | 72, 317 | 152, 318 | 159, 319 | 130, 320 | 81, 321 | 94, 322 | 81, 323 | 90, 324 | 78, 325 | 50, 326 | 20, 327 | 24, 328 | 26, 329 | ], 330 | [ 331 | 19, 332 | 17, 333 | 10, 334 | 11, 335 | 17, 336 | 68, 337 | 107, 338 | 112, 339 | 73, 340 | 79, 341 | 113, 342 | 97, 343 | 106, 344 | 90, 345 | 73, 346 | 76, 347 | 87, 348 | 68, 349 | 44, 350 | 112, 351 | 174, 352 | 160, 353 | 122, 354 | 75, 355 | 98, 356 | 69, 357 | 56, 358 | 73, 359 | 50, 360 | 18, 361 | 20, 362 | 23, 363 | ], 364 | [ 365 | 15, 366 | 19, 367 | 10, 368 | 8, 369 | 11, 370 | 64, 371 | 141, 372 | 95, 373 | 70, 374 | 97, 375 | 110, 376 | 128, 377 | 121, 378 | 69, 379 | 67, 380 | 69, 381 | 102, 382 | 129, 383 | 124, 384 | 167, 385 | 182, 386 | 168, 387 | 103, 388 | 47, 389 | 88, 390 | 72, 391 | 44, 392 | 62, 393 | 53, 394 | 17, 395 | 19, 396 | 22, 397 | ], 398 | [ 399 | 13, 400 | 18, 401 | 11, 402 | 7, 403 | 7, 404 | 112, 405 | 201, 406 | 173, 407 | 101, 408 | 93, 409 | 124, 410 | 128, 411 | 94, 412 | 70, 413 | 75, 414 | 76, 415 | 115, 416 | 134, 417 | 154, 418 | 176, 419 | 205, 420 | 178, 421 | 85, 422 | 34, 423 | 70, 424 | 71, 425 | 46, 426 | 43, 427 | 49, 428 | 19, 429 | 17, 430 | 19, 431 | ], 432 | [ 433 | 13, 434 | 17, 435 | 12, 436 | 6, 437 | 7, 438 | 107, 439 | 188, 440 | 214, 441 | 184, 442 | 98, 443 | 91, 444 | 101, 445 | 86, 446 | 84, 447 | 79, 448 | 83, 449 | 108, 450 | 121, 451 | 138, 452 | 177, 453 | 213, 454 | 188, 455 | 53, 456 | 31, 457 | 36, 458 | 50, 459 | 49, 460 | 36, 461 | 40, 462 | 20, 463 | 16, 464 | 19, 465 | ], 466 | [ 467 | 16, 468 | 19, 469 | 12, 470 | 6, 471 | 8, 472 | 88, 473 | 185, 474 | 213, 475 | 206, 476 | 173, 477 | 79, 478 | 82, 479 | 93, 480 | 89, 481 | 73, 482 | 95, 483 | 112, 484 | 96, 485 | 80, 486 | 126, 487 | 181, 488 | 175, 489 | 46, 490 | 27, 491 | 35, 492 | 26, 493 | 36, 494 | 43, 495 | 43, 496 | 22, 497 | 17, 498 | 21, 499 | ], 500 | [ 501 | 19, 502 | 21, 503 | 13, 504 | 6, 505 | 7, 506 | 69, 507 | 180, 508 | 223, 509 | 208, 510 | 190, 511 | 148, 512 | 116, 513 | 120, 514 | 98, 515 | 71, 516 | 85, 517 | 122, 518 | 98, 519 | 106, 520 | 122, 521 | 118, 522 | 126, 523 | 63, 524 | 22, 525 | 37, 526 | 32, 527 | 29, 528 | 46, 529 | 49, 530 | 24, 531 | 18, 532 | 21, 533 | ], 534 | [ 535 | 19, 536 | 21, 537 | 17, 538 | 8, 539 | 7, 540 | 62, 541 | 144, 542 | 221, 543 | 223, 544 | 207, 545 | 177, 546 | 129, 547 | 131, 548 | 89, 549 | 98, 550 | 74, 551 | 99, 552 | 122, 553 | 124, 554 | 131, 555 | 129, 556 | 89, 557 | 53, 558 | 17, 559 | 33, 560 | 45, 561 | 32, 562 | 47, 563 | 44, 564 | 24, 565 | 19, 566 | 20, 567 | ], 568 | [ 569 | 20, 570 | 23, 571 | 18, 572 | 9, 573 | 6, 574 | 53, 575 | 97, 576 | 193, 577 | 221, 578 | 215, 579 | 200, 580 | 154, 581 | 130, 582 | 111, 583 | 100, 584 | 93, 585 | 103, 586 | 144, 587 | 129, 588 | 106, 589 | 106, 590 | 69, 591 | 45, 592 | 22, 593 | 25, 594 | 39, 595 | 34, 596 | 50, 597 | 41, 598 | 23, 599 | 21, 600 | 22, 601 | ], 602 | [ 603 | 21, 604 | 23, 605 | 19, 606 | 10, 607 | 5, 608 | 43, 609 | 98, 610 | 178, 611 | 215, 612 | 220, 613 | 188, 614 | 152, 615 | 155, 616 | 124, 617 | 115, 618 | 103, 619 | 109, 620 | 147, 621 | 146, 622 | 136, 623 | 106, 624 | 81, 625 | 53, 626 | 23, 627 | 27, 628 | 27, 629 | 36, 630 | 51, 631 | 37, 632 | 23, 633 | 21, 634 | 22, 635 | ], 636 | [ 637 | 23, 638 | 25, 639 | 21, 640 | 11, 641 | 4, 642 | 28, 643 | 104, 644 | 161, 645 | 197, 646 | 208, 647 | 190, 648 | 180, 649 | 169, 650 | 140, 651 | 134, 652 | 119, 653 | 106, 654 | 139, 655 | 125, 656 | 132, 657 | 115, 658 | 87, 659 | 61, 660 | 23, 661 | 36, 662 | 43, 663 | 38, 664 | 55, 665 | 37, 666 | 25, 667 | 24, 668 | 24, 669 | ], 670 | [ 671 | 23, 672 | 25, 673 | 21, 674 | 13, 675 | 5, 676 | 16, 677 | 87, 678 | 113, 679 | 158, 680 | 188, 681 | 182, 682 | 168, 683 | 166, 684 | 154, 685 | 129, 686 | 123, 687 | 132, 688 | 126, 689 | 160, 690 | 156, 691 | 119, 692 | 107, 693 | 72, 694 | 27, 695 | 35, 696 | 41, 697 | 47, 698 | 59, 699 | 39, 700 | 29, 701 | 28, 702 | 28, 703 | ], 704 | [ 705 | 24, 706 | 24, 707 | 20, 708 | 15, 709 | 7, 710 | 6, 711 | 75, 712 | 128, 713 | 161, 714 | 172, 715 | 175, 716 | 153, 717 | 167, 718 | 169, 719 | 133, 720 | 94, 721 | 154, 722 | 126, 723 | 114, 724 | 97, 725 | 102, 726 | 83, 727 | 75, 728 | 31, 729 | 32, 730 | 39, 731 | 50, 732 | 71, 733 | 42, 734 | 31, 735 | 29, 736 | 29, 737 | ], 738 | [ 739 | 25, 740 | 23, 741 | 19, 742 | 16, 743 | 12, 744 | 3, 745 | 55, 746 | 130, 747 | 164, 748 | 163, 749 | 184, 750 | 190, 751 | 182, 752 | 175, 753 | 168, 754 | 128, 755 | 149, 756 | 132, 757 | 65, 758 | 125, 759 | 133, 760 | 82, 761 | 50, 762 | 35, 763 | 33, 764 | 46, 765 | 56, 766 | 72, 767 | 38, 768 | 30, 769 | 28, 770 | 28, 771 | ], 772 | [ 773 | 25, 774 | 23, 775 | 19, 776 | 17, 777 | 17, 778 | 9, 779 | 30, 780 | 128, 781 | 167, 782 | 180, 783 | 195, 784 | 175, 785 | 147, 786 | 207, 787 | 182, 788 | 157, 789 | 129, 790 | 107, 791 | 140, 792 | 128, 793 | 157, 794 | 108, 795 | 87, 796 | 34, 797 | 33, 798 | 45, 799 | 57, 800 | 49, 801 | 31, 802 | 28, 803 | 28, 804 | 31, 805 | ], 806 | [ 807 | 25, 808 | 23, 809 | 19, 810 | 19, 811 | 19, 812 | 19, 813 | 22, 814 | 107, 815 | 174, 816 | 168, 817 | 168, 818 | 203, 819 | 147, 820 | 202, 821 | 222, 822 | 166, 823 | 127, 824 | 75, 825 | 84, 826 | 133, 827 | 144, 828 | 114, 829 | 80, 830 | 34, 831 | 40, 832 | 53, 833 | 43, 834 | 30, 835 | 31, 836 | 32, 837 | 31, 838 | 34, 839 | ], 840 | [ 841 | 25, 842 | 21, 843 | 20, 844 | 23, 845 | 28, 846 | 26, 847 | 19, 848 | 80, 849 | 146, 850 | 133, 851 | 210, 852 | 162, 853 | 198, 854 | 151, 855 | 224, 856 | 175, 857 | 128, 858 | 90, 859 | 137, 860 | 173, 861 | 103, 862 | 82, 863 | 56, 864 | 38, 865 | 55, 866 | 61, 867 | 33, 868 | 27, 869 | 36, 870 | 39, 871 | 34, 872 | 34, 873 | ], 874 | [ 875 | 25, 876 | 23, 877 | 25, 878 | 26, 879 | 33, 880 | 38, 881 | 22, 882 | 32, 883 | 142, 884 | 207, 885 | 194, 886 | 184, 887 | 133, 888 | 151, 889 | 215, 890 | 201, 891 | 129, 892 | 68, 893 | 144, 894 | 125, 895 | 104, 896 | 98, 897 | 66, 898 | 56, 899 | 71, 900 | 55, 901 | 38, 902 | 39, 903 | 36, 904 | 36, 905 | 39, 906 | 39, 907 | ], 908 | [ 909 | 26, 910 | 26, 911 | 27, 912 | 25, 913 | 31, 914 | 41, 915 | 40, 916 | 27, 917 | 94, 918 | 206, 919 | 211, 920 | 162, 921 | 179, 922 | 201, 923 | 159, 924 | 210, 925 | 139, 926 | 48, 927 | 99, 928 | 125, 929 | 116, 930 | 86, 931 | 74, 932 | 69, 933 | 56, 934 | 40, 935 | 41, 936 | 34, 937 | 36, 938 | 39, 939 | 40, 940 | 43, 941 | ], 942 | [ 943 | 28, 944 | 27, 945 | 30, 946 | 27, 947 | 30, 948 | 36, 949 | 42, 950 | 43, 951 | 65, 952 | 138, 953 | 202, 954 | 194, 955 | 166, 956 | 175, 957 | 135, 958 | 195, 959 | 157, 960 | 58, 961 | 98, 962 | 110, 963 | 112, 964 | 90, 965 | 80, 966 | 54, 967 | 21, 968 | 24, 969 | 32, 970 | 40, 971 | 40, 972 | 43, 973 | 42, 974 | 44, 975 | ], 976 | [ 977 | 26, 978 | 27, 979 | 37, 980 | 29, 981 | 30, 982 | 34, 983 | 36, 984 | 43, 985 | 39, 986 | 100, 987 | 198, 988 | 222, 989 | 216, 990 | 208, 991 | 182, 992 | 181, 993 | 172, 994 | 86, 995 | 110, 996 | 130, 997 | 125, 998 | 108, 999 | 101, 1000 | 49, 1001 | 25, 1002 | 30, 1003 | 34, 1004 | 41, 1005 | 44, 1006 | 47, 1007 | 45, 1008 | 45, 1009 | ], 1010 | [ 1011 | 27, 1012 | 28, 1013 | 36, 1014 | 31, 1015 | 33, 1016 | 35, 1017 | 32, 1018 | 36, 1019 | 39, 1020 | 118, 1021 | 233, 1022 | 231, 1023 | 240, 1024 | 212, 1025 | 227, 1026 | 179, 1027 | 119, 1028 | 149, 1029 | 138, 1030 | 141, 1031 | 145, 1032 | 142, 1033 | 131, 1034 | 60, 1035 | 48, 1036 | 49, 1037 | 43, 1038 | 42, 1039 | 45, 1040 | 47, 1041 | 46, 1042 | 46, 1043 | ], 1044 | [ 1045 | 30, 1046 | 35, 1047 | 34, 1048 | 40, 1049 | 42, 1050 | 44, 1051 | 43, 1052 | 56, 1053 | 61, 1054 | 103, 1055 | 241, 1056 | 249, 1057 | 248, 1058 | 230, 1059 | 239, 1060 | 223, 1061 | 138, 1062 | 196, 1063 | 156, 1064 | 163, 1065 | 170, 1066 | 176, 1067 | 152, 1068 | 47, 1069 | 41, 1070 | 56, 1071 | 59, 1072 | 57, 1073 | 52, 1074 | 46, 1075 | 46, 1076 | 47, 1077 | ], 1078 | [ 1079 | 36, 1080 | 45, 1081 | 34, 1082 | 39, 1083 | 52, 1084 | 58, 1085 | 60, 1086 | 54, 1087 | 63, 1088 | 104, 1089 | 219, 1090 | 253, 1091 | 241, 1092 | 241, 1093 | 240, 1094 | 215, 1095 | 169, 1096 | 177, 1097 | 174, 1098 | 214, 1099 | 208, 1100 | 195, 1101 | 167, 1102 | 68, 1103 | 44, 1104 | 58, 1105 | 52, 1106 | 46, 1107 | 48, 1108 | 45, 1109 | 46, 1110 | 49, 1111 | ], 1112 | [ 1113 | 46, 1114 | 51, 1115 | 38, 1116 | 39, 1117 | 46, 1118 | 53, 1119 | 62, 1120 | 75, 1121 | 98, 1122 | 104, 1123 | 137, 1124 | 208, 1125 | 199, 1126 | 181, 1127 | 220, 1128 | 214, 1129 | 180, 1130 | 109, 1131 | 123, 1132 | 241, 1133 | 236, 1134 | 214, 1135 | 163, 1136 | 60, 1137 | 58, 1138 | 48, 1139 | 61, 1140 | 55, 1141 | 49, 1142 | 44, 1143 | 47, 1144 | 50, 1145 | ], 1146 | [ 1147 | 59, 1148 | 51, 1149 | 42, 1150 | 37, 1151 | 52, 1152 | 63, 1153 | 69, 1154 | 98, 1155 | 95, 1156 | 81, 1157 | 71, 1158 | 109, 1159 | 122, 1160 | 104, 1161 | 121, 1162 | 120, 1163 | 94, 1164 | 50, 1165 | 67, 1166 | 219, 1167 | 248, 1168 | 215, 1169 | 127, 1170 | 66, 1171 | 60, 1172 | 54, 1173 | 41, 1174 | 58, 1175 | 56, 1176 | 41, 1177 | 46, 1178 | 54, 1179 | ], 1180 | [ 1181 | 67, 1182 | 61, 1183 | 54, 1184 | 33, 1185 | 67, 1186 | 87, 1187 | 81, 1188 | 92, 1189 | 78, 1190 | 69, 1191 | 60, 1192 | 102, 1193 | 90, 1194 | 82, 1195 | 73, 1196 | 71, 1197 | 70, 1198 | 57, 1199 | 40, 1200 | 110, 1201 | 187, 1202 | 132, 1203 | 87, 1204 | 80, 1205 | 67, 1206 | 57, 1207 | 48, 1208 | 58, 1209 | 65, 1210 | 44, 1211 | 45, 1212 | 53, 1213 | ], 1214 | [ 1215 | 73, 1216 | 72, 1217 | 51, 1218 | 37, 1219 | 80, 1220 | 86, 1221 | 85, 1222 | 88, 1223 | 63, 1224 | 69, 1225 | 75, 1226 | 87, 1227 | 81, 1228 | 75, 1229 | 74, 1230 | 75, 1231 | 78, 1232 | 67, 1233 | 50, 1234 | 54, 1235 | 69, 1236 | 50, 1237 | 73, 1238 | 79, 1239 | 75, 1240 | 56, 1241 | 62, 1242 | 55, 1243 | 65, 1244 | 55, 1245 | 45, 1246 | 52, 1247 | ], 1248 | [ 1249 | 77, 1250 | 71, 1251 | 40, 1252 | 51, 1253 | 77, 1254 | 73, 1255 | 91, 1256 | 85, 1257 | 54, 1258 | 78, 1259 | 90, 1260 | 71, 1261 | 83, 1262 | 73, 1263 | 75, 1264 | 72, 1265 | 77, 1266 | 74, 1267 | 59, 1268 | 56, 1269 | 66, 1270 | 49, 1271 | 66, 1272 | 74, 1273 | 61, 1274 | 57, 1275 | 68, 1276 | 63, 1277 | 54, 1278 | 61, 1279 | 53, 1280 | 51, 1281 | ], 1282 | [ 1283 | 72, 1284 | 70, 1285 | 56, 1286 | 65, 1287 | 69, 1288 | 77, 1289 | 88, 1290 | 76, 1291 | 56, 1292 | 81, 1293 | 97, 1294 | 68, 1295 | 80, 1296 | 73, 1297 | 72, 1298 | 74, 1299 | 77, 1300 | 66, 1301 | 63, 1302 | 61, 1303 | 65, 1304 | 53, 1305 | 66, 1306 | 69, 1307 | 59, 1308 | 59, 1309 | 61, 1310 | 70, 1311 | 54, 1312 | 64, 1313 | 51, 1314 | 55, 1315 | ], 1316 | [ 1317 | 70, 1318 | 68, 1319 | 63, 1320 | 67, 1321 | 67, 1322 | 70, 1323 | 79, 1324 | 68, 1325 | 66, 1326 | 81, 1327 | 87, 1328 | 69, 1329 | 78, 1330 | 73, 1331 | 73, 1332 | 72, 1333 | 76, 1334 | 66, 1335 | 61, 1336 | 67, 1337 | 66, 1338 | 58, 1339 | 67, 1340 | 62, 1341 | 64, 1342 | 60, 1343 | 62, 1344 | 62, 1345 | 54, 1346 | 63, 1347 | 49, 1348 | 52, 1349 | ], 1350 | [ 1351 | 77, 1352 | 69, 1353 | 64, 1354 | 69, 1355 | 64, 1356 | 68, 1357 | 70, 1358 | 72, 1359 | 73, 1360 | 84, 1361 | 76, 1362 | 72, 1363 | 78, 1364 | 77, 1365 | 75, 1366 | 72, 1367 | 77, 1368 | 66, 1369 | 67, 1370 | 65, 1371 | 70, 1372 | 59, 1373 | 64, 1374 | 65, 1375 | 65, 1376 | 64, 1377 | 61, 1378 | 65, 1379 | 53, 1380 | 61, 1381 | 50, 1382 | 52, 1383 | ], 1384 | ] 1385 | -------------------------------------------------------------------------------- /tools/.gitignore: -------------------------------------------------------------------------------- 1 | *.docx 2 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | def main(): 5 | pass 6 | 7 | 8 | if __name__ == "__main__": 9 | main() 10 | -------------------------------------------------------------------------------- /tools/convert.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Convert Specification to .docx. 3 | 4 | Requires https://pandoc.org/ to be on path. 5 | """ 6 | import os 7 | import subprocess 8 | 9 | infiles = [ 10 | "index.md", 11 | "features.md", 12 | "concept.md", 13 | "specification.md", 14 | "implementations.md", 15 | "license.md", 16 | ] 17 | 18 | 19 | def main(): 20 | os.chdir("../docs") 21 | cmd = ["pandoc", "-s", "-o", "../tools/iscc-specification.docx"] + infiles 22 | subprocess.run(cmd) 23 | 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /tools/stamp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Generate an ISCC-Text for this specification""" 3 | from os import walk, chdir 4 | from os.path import dirname, join 5 | import iscc 6 | from subprocess import call 7 | 8 | 9 | PROJECT_DIR = dirname(dirname(__file__)) 10 | 11 | 12 | def get_content(mode="text"): 13 | 14 | chdir(PROJECT_DIR) 15 | call(["mkdocs", "build"]) 16 | 17 | content = "" if mode == "text" else b"" 18 | if mode == "text": 19 | for root, dirs, files in walk(join(PROJECT_DIR, "docs")): 20 | for f in files: 21 | if f.endswith(".md"): 22 | path = join(root, f) 23 | with open(path, "r", encoding="utf-8") as textfile: 24 | content += textfile.read() 25 | if mode == "data": 26 | for root, dirs, files in walk(join(PROJECT_DIR, "site")): 27 | for f in files: 28 | path = join(root, f) 29 | with open(path, "rb") as datafile: 30 | content += datafile.read() 31 | 32 | return content 33 | 34 | 35 | def spec_iscc(): 36 | title = "ISCC - Specification" 37 | text = open(join(PROJECT_DIR, "docs/specification.md"), encoding="utf-8").read() 38 | data = open(join(PROJECT_DIR, "docs/specification.md"), "rb").read() 39 | mid, title, extra = iscc.meta_id(title) 40 | cidt = iscc.content_id_text(text) 41 | did = iscc.data_id(data) 42 | iid, hash_ = iscc.instance_id(data) 43 | code = "-".join((mid, cidt, did, iid)) 44 | print("SPEC:") 45 | print("TITLE:", title, extra) 46 | print("ISCC:", code) 47 | print("IIDF:", hash_) 48 | 49 | 50 | def site_iscc(): 51 | title = "ISCC - Content Identifiers" 52 | text = get_content("text") 53 | data = get_content("data") 54 | mid, title, extra = iscc.meta_id(title) 55 | cidt = iscc.content_id_text(text) 56 | did = iscc.data_id(data) 57 | iid, hash_ = iscc.instance_id(data) 58 | code = "-".join((mid, cidt, did, iid)) 59 | print("SITE:") 60 | print("TITLE:", title, extra) 61 | print("ISCC:", code) 62 | print("IIDF:", hash_) 63 | 64 | 65 | if __name__ == "__main__": 66 | spec_iscc() 67 | site_iscc() 68 | --------------------------------------------------------------------------------