├── .env.example ├── .envrc ├── .gitignore ├── .vscode └── launch.json ├── LICENSE ├── README.md ├── icon ├── favicon_package_v0.16.zip ├── osm-revert.gif └── osm-revert.xcf ├── osm_revert ├── config.py ├── context_logger.py ├── diff_entry.py ├── diff_match_patch.py ├── dmp_utils.py ├── invert.py ├── main.py ├── osm.py ├── overpass.py └── utils.py ├── pyproject.toml ├── shell.nix ├── uv.lock └── web ├── main.py ├── static ├── css │ ├── bootstrap.css │ ├── bootstrap.css.map │ ├── bootstrap.min.css │ ├── bootstrap.min.css.map │ └── style.css ├── img │ ├── brands │ │ └── openstreetmap.webp │ └── favicon │ │ ├── 256.png │ │ ├── 256.webp │ │ ├── 480.png │ │ └── 480.webp └── js │ └── authorized.js └── templates ├── _base.jinja2 ├── authorized.jinja2 └── index.jinja2 /.env.example: -------------------------------------------------------------------------------- 1 | # if running standalone: 2 | OSM_TOKEN= 3 | # if running web: 4 | OSM_CLIENT= 5 | OSM_SECRET= 6 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | # shellcheck disable=SC2148 2 | 3 | use nix 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/dotenv,python,pycharm,visualstudiocode,direnv 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=dotenv,python,pycharm,visualstudiocode,direnv 3 | 4 | ### direnv ### 5 | .direnv 6 | 7 | ### dotenv ### 8 | .env 9 | 10 | ### PyCharm ### 11 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 12 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 13 | 14 | # User-specific stuff 15 | .idea/**/workspace.xml 16 | .idea/**/tasks.xml 17 | .idea/**/usage.statistics.xml 18 | .idea/**/dictionaries 19 | .idea/**/shelf 20 | 21 | # AWS User-specific 22 | .idea/**/aws.xml 23 | 24 | # Generated files 25 | .idea/**/contentModel.xml 26 | 27 | # Sensitive or high-churn files 28 | .idea/**/dataSources/ 29 | .idea/**/dataSources.ids 30 | .idea/**/dataSources.local.xml 31 | .idea/**/sqlDataSources.xml 32 | .idea/**/dynamic.xml 33 | .idea/**/uiDesigner.xml 34 | .idea/**/dbnavigator.xml 35 | 36 | # Gradle 37 | .idea/**/gradle.xml 38 | .idea/**/libraries 39 | 40 | # Gradle and Maven with auto-import 41 | # When using Gradle or Maven with auto-import, you should exclude module files, 42 | # since they will be recreated, and may cause churn. Uncomment if using 43 | # auto-import. 44 | # .idea/artifacts 45 | # .idea/compiler.xml 46 | # .idea/jarRepositories.xml 47 | # .idea/modules.xml 48 | # .idea/*.iml 49 | # .idea/modules 50 | # *.iml 51 | # *.ipr 52 | 53 | # CMake 54 | cmake-build-*/ 55 | 56 | # Mongo Explorer plugin 57 | .idea/**/mongoSettings.xml 58 | 59 | # File-based project format 60 | *.iws 61 | 62 | # IntelliJ 63 | out/ 64 | 65 | # mpeltonen/sbt-idea plugin 66 | .idea_modules/ 67 | 68 | # JIRA plugin 69 | atlassian-ide-plugin.xml 70 | 71 | # Cursive Clojure plugin 72 | .idea/replstate.xml 73 | 74 | # SonarLint plugin 75 | .idea/sonarlint/ 76 | 77 | # Crashlytics plugin (for Android Studio and IntelliJ) 78 | com_crashlytics_export_strings.xml 79 | crashlytics.properties 80 | crashlytics-build.properties 81 | fabric.properties 82 | 83 | # Editor-based Rest Client 84 | .idea/httpRequests 85 | 86 | # Android studio 3.1+ serialized cache file 87 | .idea/caches/build_file_checksums.ser 88 | 89 | ### PyCharm Patch ### 90 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 91 | 92 | # *.iml 93 | # modules.xml 94 | # .idea/misc.xml 95 | # *.ipr 96 | 97 | # Sonarlint plugin 98 | # https://plugins.jetbrains.com/plugin/7973-sonarlint 99 | .idea/**/sonarlint/ 100 | 101 | # SonarQube Plugin 102 | # https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin 103 | .idea/**/sonarIssues.xml 104 | 105 | # Markdown Navigator plugin 106 | # https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced 107 | .idea/**/markdown-navigator.xml 108 | .idea/**/markdown-navigator-enh.xml 109 | .idea/**/markdown-navigator/ 110 | 111 | # Cache file creation bug 112 | # See https://youtrack.jetbrains.com/issue/JBR-2257 113 | .idea/$CACHE_FILE$ 114 | 115 | # CodeStream plugin 116 | # https://plugins.jetbrains.com/plugin/12206-codestream 117 | .idea/codestream.xml 118 | 119 | # Azure Toolkit for IntelliJ plugin 120 | # https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij 121 | .idea/**/azureSettings.xml 122 | 123 | ### Python ### 124 | # Byte-compiled / optimized / DLL files 125 | __pycache__/ 126 | *.py[cod] 127 | *$py.class 128 | 129 | # C extensions 130 | *.so 131 | 132 | # Distribution / packaging 133 | .Python 134 | build/ 135 | develop-eggs/ 136 | dist/ 137 | downloads/ 138 | eggs/ 139 | .eggs/ 140 | lib/ 141 | lib64/ 142 | parts/ 143 | sdist/ 144 | var/ 145 | wheels/ 146 | share/python-wheels/ 147 | *.egg-info/ 148 | .installed.cfg 149 | *.egg 150 | MANIFEST 151 | 152 | # PyInstaller 153 | # Usually these files are written by a python script from a template 154 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 155 | *.manifest 156 | *.spec 157 | 158 | # Installer logs 159 | pip-log.txt 160 | pip-delete-this-directory.txt 161 | 162 | # Unit test / coverage reports 163 | htmlcov/ 164 | .tox/ 165 | .nox/ 166 | .coverage 167 | .coverage.* 168 | .cache 169 | nosetests.xml 170 | coverage.xml 171 | *.cover 172 | *.py,cover 173 | .hypothesis/ 174 | .pytest_cache/ 175 | cover/ 176 | 177 | # Translations 178 | *.mo 179 | *.pot 180 | 181 | # Django stuff: 182 | *.log 183 | local_settings.py 184 | db.sqlite3 185 | db.sqlite3-journal 186 | 187 | # Flask stuff: 188 | instance/ 189 | .webassets-cache 190 | 191 | # Scrapy stuff: 192 | .scrapy 193 | 194 | # Sphinx documentation 195 | docs/_build/ 196 | 197 | # PyBuilder 198 | .pybuilder/ 199 | target/ 200 | 201 | # Jupyter Notebook 202 | .ipynb_checkpoints 203 | 204 | # IPython 205 | profile_default/ 206 | ipython_config.py 207 | 208 | # pyenv 209 | # For a library or package, you might want to ignore these files since the code is 210 | # intended to run in multiple environments; otherwise, check them in: 211 | # .python-version 212 | 213 | # pipenv 214 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 215 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 216 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 217 | # install all needed dependencies. 218 | #Pipfile.lock 219 | 220 | # poetry 221 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 222 | # This is especially recommended for binary packages to ensure reproducibility, and is more 223 | # commonly ignored for libraries. 224 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 225 | #poetry.lock 226 | 227 | # pdm 228 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 229 | #pdm.lock 230 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 231 | # in version control. 232 | # https://pdm.fming.dev/#use-with-ide 233 | .pdm.toml 234 | 235 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 236 | __pypackages__/ 237 | 238 | # Celery stuff 239 | celerybeat-schedule 240 | celerybeat.pid 241 | 242 | # SageMath parsed files 243 | *.sage.py 244 | 245 | # Environments 246 | .venv 247 | env/ 248 | venv/ 249 | ENV/ 250 | env.bak/ 251 | venv.bak/ 252 | 253 | # Spyder project settings 254 | .spyderproject 255 | .spyproject 256 | 257 | # Rope project settings 258 | .ropeproject 259 | 260 | # mkdocs documentation 261 | /site 262 | 263 | # mypy 264 | .mypy_cache/ 265 | .dmypy.json 266 | dmypy.json 267 | 268 | # Pyre type checker 269 | .pyre/ 270 | 271 | # pytype static type analyzer 272 | .pytype/ 273 | 274 | # Cython debug symbols 275 | cython_debug/ 276 | 277 | # PyCharm 278 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 279 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 280 | # and can be added to the global gitignore or merged into this file. For a more nuclear 281 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 282 | #.idea/ 283 | 284 | ### Python Patch ### 285 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 286 | poetry.toml 287 | 288 | # ruff 289 | .ruff_cache/ 290 | 291 | # LSP config files 292 | pyrightconfig.json 293 | 294 | ### VisualStudioCode ### 295 | .vscode/* 296 | !.vscode/settings.json 297 | !.vscode/tasks.json 298 | !.vscode/launch.json 299 | !.vscode/extensions.json 300 | !.vscode/*.code-snippets 301 | 302 | # Local History for Visual Studio Code 303 | .history/ 304 | 305 | # Built Visual Studio Code Extensions 306 | *.vsix 307 | 308 | ### VisualStudioCode Patch ### 309 | # Ignore all local history of files 310 | .history 311 | .ionide 312 | 313 | # End of https://www.toptal.com/developers/gitignore/api/dotenv,python,pycharm,visualstudiocode,direnv 314 | 315 | web/static/css/style.*.css 316 | web/static/js/authorized.*.js 317 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "configurations": [ 3 | { 4 | "console": "integratedTerminal", 5 | "justMyCode": true, 6 | "name": "Python: main.py", 7 | "program": "${workspaceFolder}/osm_revert/main.py", 8 | "request": "launch", 9 | "type": "debugpy" 10 | }, 11 | { 12 | "args": [ 13 | "--reload", 14 | "web.main:app" 15 | ], 16 | "jinja": true, 17 | "justMyCode": true, 18 | "module": "uvicorn", 19 | "name": "Python: FastAPI", 20 | "request": "launch", 21 | "type": "debugpy" 22 | } 23 | ], 24 | "version": "0.2.0" 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU AFFERO GENERAL PUBLIC LICENSE 2 | Version 3, 19 November 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU Affero General Public License is a free, copyleft license for 11 | software and other kinds of works, specifically designed to ensure 12 | cooperation with the community in the case of network server software. 13 | 14 | The licenses for most software and other practical works are designed 15 | to take away your freedom to share and change the works. By contrast, 16 | our General Public Licenses are intended to guarantee your freedom to 17 | share and change all versions of a program--to make sure it remains free 18 | software for all its users. 19 | 20 | When we speak of free software, we are referring to freedom, not 21 | price. Our General Public Licenses are designed to make sure that you 22 | have the freedom to distribute copies of free software (and charge for 23 | them if you wish), that you receive source code or can get it if you 24 | want it, that you can change the software or use pieces of it in new 25 | free programs, and that you know you can do these things. 26 | 27 | Developers that use our General Public Licenses protect your rights 28 | with two steps: (1) assert copyright on the software, and (2) offer 29 | you this License which gives you legal permission to copy, distribute 30 | and/or modify the software. 31 | 32 | A secondary benefit of defending all users' freedom is that 33 | improvements made in alternate versions of the program, if they 34 | receive widespread use, become available for other developers to 35 | incorporate. Many developers of free software are heartened and 36 | encouraged by the resulting cooperation. However, in the case of 37 | software used on network servers, this result may fail to come about. 38 | The GNU General Public License permits making a modified version and 39 | letting the public access it on a server without ever releasing its 40 | source code to the public. 41 | 42 | The GNU Affero General Public License is designed specifically to 43 | ensure that, in such cases, the modified source code becomes available 44 | to the community. It requires the operator of a network server to 45 | provide the source code of the modified version running there to the 46 | users of that server. Therefore, public use of a modified version, on 47 | a publicly accessible server, gives the public access to the source 48 | code of the modified version. 49 | 50 | An older license, called the Affero General Public License and 51 | published by Affero, was designed to accomplish similar goals. This is 52 | a different license, not a version of the Affero GPL, but Affero has 53 | released a new version of the Affero GPL which permits relicensing under 54 | this license. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | TERMS AND CONDITIONS 60 | 61 | 0. Definitions. 62 | 63 | "This License" refers to version 3 of the GNU Affero General Public License. 64 | 65 | "Copyright" also means copyright-like laws that apply to other kinds of 66 | works, such as semiconductor masks. 67 | 68 | "The Program" refers to any copyrightable work licensed under this 69 | License. Each licensee is addressed as "you". "Licensees" and 70 | "recipients" may be individuals or organizations. 71 | 72 | To "modify" a work means to copy from or adapt all or part of the work 73 | in a fashion requiring copyright permission, other than the making of an 74 | exact copy. The resulting work is called a "modified version" of the 75 | earlier work or a work "based on" the earlier work. 76 | 77 | A "covered work" means either the unmodified Program or a work based 78 | on the Program. 79 | 80 | To "propagate" a work means to do anything with it that, without 81 | permission, would make you directly or secondarily liable for 82 | infringement under applicable copyright law, except executing it on a 83 | computer or modifying a private copy. Propagation includes copying, 84 | distribution (with or without modification), making available to the 85 | public, and in some countries other activities as well. 86 | 87 | To "convey" a work means any kind of propagation that enables other 88 | parties to make or receive copies. Mere interaction with a user through 89 | a computer network, with no transfer of a copy, is not conveying. 90 | 91 | An interactive user interface displays "Appropriate Legal Notices" 92 | to the extent that it includes a convenient and prominently visible 93 | feature that (1) displays an appropriate copyright notice, and (2) 94 | tells the user that there is no warranty for the work (except to the 95 | extent that warranties are provided), that licensees may convey the 96 | work under this License, and how to view a copy of this License. If 97 | the interface presents a list of user commands or options, such as a 98 | menu, a prominent item in the list meets this criterion. 99 | 100 | 1. Source Code. 101 | 102 | The "source code" for a work means the preferred form of the work 103 | for making modifications to it. "Object code" means any non-source 104 | form of a work. 105 | 106 | A "Standard Interface" means an interface that either is an official 107 | standard defined by a recognized standards body, or, in the case of 108 | interfaces specified for a particular programming language, one that 109 | is widely used among developers working in that language. 110 | 111 | The "System Libraries" of an executable work include anything, other 112 | than the work as a whole, that (a) is included in the normal form of 113 | packaging a Major Component, but which is not part of that Major 114 | Component, and (b) serves only to enable use of the work with that 115 | Major Component, or to implement a Standard Interface for which an 116 | implementation is available to the public in source code form. A 117 | "Major Component", in this context, means a major essential component 118 | (kernel, window system, and so on) of the specific operating system 119 | (if any) on which the executable work runs, or a compiler used to 120 | produce the work, or an object code interpreter used to run it. 121 | 122 | The "Corresponding Source" for a work in object code form means all 123 | the source code needed to generate, install, and (for an executable 124 | work) run the object code and to modify the work, including scripts to 125 | control those activities. However, it does not include the work's 126 | System Libraries, or general-purpose tools or generally available free 127 | programs which are used unmodified in performing those activities but 128 | which are not part of the work. For example, Corresponding Source 129 | includes interface definition files associated with source files for 130 | the work, and the source code for shared libraries and dynamically 131 | linked subprograms that the work is specifically designed to require, 132 | such as by intimate data communication or control flow between those 133 | subprograms and other parts of the work. 134 | 135 | The Corresponding Source need not include anything that users 136 | can regenerate automatically from other parts of the Corresponding 137 | Source. 138 | 139 | The Corresponding Source for a work in source code form is that 140 | same work. 141 | 142 | 2. Basic Permissions. 143 | 144 | All rights granted under this License are granted for the term of 145 | copyright on the Program, and are irrevocable provided the stated 146 | conditions are met. This License explicitly affirms your unlimited 147 | permission to run the unmodified Program. The output from running a 148 | covered work is covered by this License only if the output, given its 149 | content, constitutes a covered work. This License acknowledges your 150 | rights of fair use or other equivalent, as provided by copyright law. 151 | 152 | You may make, run and propagate covered works that you do not 153 | convey, without conditions so long as your license otherwise remains 154 | in force. You may convey covered works to others for the sole purpose 155 | of having them make modifications exclusively for you, or provide you 156 | with facilities for running those works, provided that you comply with 157 | the terms of this License in conveying all material for which you do 158 | not control copyright. Those thus making or running the covered works 159 | for you must do so exclusively on your behalf, under your direction 160 | and control, on terms that prohibit them from making any copies of 161 | your copyrighted material outside their relationship with you. 162 | 163 | Conveying under any other circumstances is permitted solely under 164 | the conditions stated below. Sublicensing is not allowed; section 10 165 | makes it unnecessary. 166 | 167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 168 | 169 | No covered work shall be deemed part of an effective technological 170 | measure under any applicable law fulfilling obligations under article 171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 172 | similar laws prohibiting or restricting circumvention of such 173 | measures. 174 | 175 | When you convey a covered work, you waive any legal power to forbid 176 | circumvention of technological measures to the extent such circumvention 177 | is effected by exercising rights under this License with respect to 178 | the covered work, and you disclaim any intention to limit operation or 179 | modification of the work as a means of enforcing, against the work's 180 | users, your or third parties' legal rights to forbid circumvention of 181 | technological measures. 182 | 183 | 4. Conveying Verbatim Copies. 184 | 185 | You may convey verbatim copies of the Program's source code as you 186 | receive it, in any medium, provided that you conspicuously and 187 | appropriately publish on each copy an appropriate copyright notice; 188 | keep intact all notices stating that this License and any 189 | non-permissive terms added in accord with section 7 apply to the code; 190 | keep intact all notices of the absence of any warranty; and give all 191 | recipients a copy of this License along with the Program. 192 | 193 | You may charge any price or no price for each copy that you convey, 194 | and you may offer support or warranty protection for a fee. 195 | 196 | 5. Conveying Modified Source Versions. 197 | 198 | You may convey a work based on the Program, or the modifications to 199 | produce it from the Program, in the form of source code under the 200 | terms of section 4, provided that you also meet all of these conditions: 201 | 202 | a) The work must carry prominent notices stating that you modified 203 | it, and giving a relevant date. 204 | 205 | b) The work must carry prominent notices stating that it is 206 | released under this License and any conditions added under section 207 | 7. This requirement modifies the requirement in section 4 to 208 | "keep intact all notices". 209 | 210 | c) You must license the entire work, as a whole, under this 211 | License to anyone who comes into possession of a copy. This 212 | License will therefore apply, along with any applicable section 7 213 | additional terms, to the whole of the work, and all its parts, 214 | regardless of how they are packaged. This License gives no 215 | permission to license the work in any other way, but it does not 216 | invalidate such permission if you have separately received it. 217 | 218 | d) If the work has interactive user interfaces, each must display 219 | Appropriate Legal Notices; however, if the Program has interactive 220 | interfaces that do not display Appropriate Legal Notices, your 221 | work need not make them do so. 222 | 223 | A compilation of a covered work with other separate and independent 224 | works, which are not by their nature extensions of the covered work, 225 | and which are not combined with it such as to form a larger program, 226 | in or on a volume of a storage or distribution medium, is called an 227 | "aggregate" if the compilation and its resulting copyright are not 228 | used to limit the access or legal rights of the compilation's users 229 | beyond what the individual works permit. Inclusion of a covered work 230 | in an aggregate does not cause this License to apply to the other 231 | parts of the aggregate. 232 | 233 | 6. Conveying Non-Source Forms. 234 | 235 | You may convey a covered work in object code form under the terms 236 | of sections 4 and 5, provided that you also convey the 237 | machine-readable Corresponding Source under the terms of this License, 238 | in one of these ways: 239 | 240 | a) Convey the object code in, or embodied in, a physical product 241 | (including a physical distribution medium), accompanied by the 242 | Corresponding Source fixed on a durable physical medium 243 | customarily used for software interchange. 244 | 245 | b) Convey the object code in, or embodied in, a physical product 246 | (including a physical distribution medium), accompanied by a 247 | written offer, valid for at least three years and valid for as 248 | long as you offer spare parts or customer support for that product 249 | model, to give anyone who possesses the object code either (1) a 250 | copy of the Corresponding Source for all the software in the 251 | product that is covered by this License, on a durable physical 252 | medium customarily used for software interchange, for a price no 253 | more than your reasonable cost of physically performing this 254 | conveying of source, or (2) access to copy the 255 | Corresponding Source from a network server at no charge. 256 | 257 | c) Convey individual copies of the object code with a copy of the 258 | written offer to provide the Corresponding Source. This 259 | alternative is allowed only occasionally and noncommercially, and 260 | only if you received the object code with such an offer, in accord 261 | with subsection 6b. 262 | 263 | d) Convey the object code by offering access from a designated 264 | place (gratis or for a charge), and offer equivalent access to the 265 | Corresponding Source in the same way through the same place at no 266 | further charge. You need not require recipients to copy the 267 | Corresponding Source along with the object code. If the place to 268 | copy the object code is a network server, the Corresponding Source 269 | may be on a different server (operated by you or a third party) 270 | that supports equivalent copying facilities, provided you maintain 271 | clear directions next to the object code saying where to find the 272 | Corresponding Source. Regardless of what server hosts the 273 | Corresponding Source, you remain obligated to ensure that it is 274 | available for as long as needed to satisfy these requirements. 275 | 276 | e) Convey the object code using peer-to-peer transmission, provided 277 | you inform other peers where the object code and Corresponding 278 | Source of the work are being offered to the general public at no 279 | charge under subsection 6d. 280 | 281 | A separable portion of the object code, whose source code is excluded 282 | from the Corresponding Source as a System Library, need not be 283 | included in conveying the object code work. 284 | 285 | A "User Product" is either (1) a "consumer product", which means any 286 | tangible personal property which is normally used for personal, family, 287 | or household purposes, or (2) anything designed or sold for incorporation 288 | into a dwelling. In determining whether a product is a consumer product, 289 | doubtful cases shall be resolved in favor of coverage. For a particular 290 | product received by a particular user, "normally used" refers to a 291 | typical or common use of that class of product, regardless of the status 292 | of the particular user or of the way in which the particular user 293 | actually uses, or expects or is expected to use, the product. A product 294 | is a consumer product regardless of whether the product has substantial 295 | commercial, industrial or non-consumer uses, unless such uses represent 296 | the only significant mode of use of the product. 297 | 298 | "Installation Information" for a User Product means any methods, 299 | procedures, authorization keys, or other information required to install 300 | and execute modified versions of a covered work in that User Product from 301 | a modified version of its Corresponding Source. The information must 302 | suffice to ensure that the continued functioning of the modified object 303 | code is in no case prevented or interfered with solely because 304 | modification has been made. 305 | 306 | If you convey an object code work under this section in, or with, or 307 | specifically for use in, a User Product, and the conveying occurs as 308 | part of a transaction in which the right of possession and use of the 309 | User Product is transferred to the recipient in perpetuity or for a 310 | fixed term (regardless of how the transaction is characterized), the 311 | Corresponding Source conveyed under this section must be accompanied 312 | by the Installation Information. But this requirement does not apply 313 | if neither you nor any third party retains the ability to install 314 | modified object code on the User Product (for example, the work has 315 | been installed in ROM). 316 | 317 | The requirement to provide Installation Information does not include a 318 | requirement to continue to provide support service, warranty, or updates 319 | for a work that has been modified or installed by the recipient, or for 320 | the User Product in which it has been modified or installed. Access to a 321 | network may be denied when the modification itself materially and 322 | adversely affects the operation of the network or violates the rules and 323 | protocols for communication across the network. 324 | 325 | Corresponding Source conveyed, and Installation Information provided, 326 | in accord with this section must be in a format that is publicly 327 | documented (and with an implementation available to the public in 328 | source code form), and must require no special password or key for 329 | unpacking, reading or copying. 330 | 331 | 7. Additional Terms. 332 | 333 | "Additional permissions" are terms that supplement the terms of this 334 | License by making exceptions from one or more of its conditions. 335 | Additional permissions that are applicable to the entire Program shall 336 | be treated as though they were included in this License, to the extent 337 | that they are valid under applicable law. If additional permissions 338 | apply only to part of the Program, that part may be used separately 339 | under those permissions, but the entire Program remains governed by 340 | this License without regard to the additional permissions. 341 | 342 | When you convey a copy of a covered work, you may at your option 343 | remove any additional permissions from that copy, or from any part of 344 | it. (Additional permissions may be written to require their own 345 | removal in certain cases when you modify the work.) You may place 346 | additional permissions on material, added by you to a covered work, 347 | for which you have or can give appropriate copyright permission. 348 | 349 | Notwithstanding any other provision of this License, for material you 350 | add to a covered work, you may (if authorized by the copyright holders of 351 | that material) supplement the terms of this License with terms: 352 | 353 | a) Disclaiming warranty or limiting liability differently from the 354 | terms of sections 15 and 16 of this License; or 355 | 356 | b) Requiring preservation of specified reasonable legal notices or 357 | author attributions in that material or in the Appropriate Legal 358 | Notices displayed by works containing it; or 359 | 360 | c) Prohibiting misrepresentation of the origin of that material, or 361 | requiring that modified versions of such material be marked in 362 | reasonable ways as different from the original version; or 363 | 364 | d) Limiting the use for publicity purposes of names of licensors or 365 | authors of the material; or 366 | 367 | e) Declining to grant rights under trademark law for use of some 368 | trade names, trademarks, or service marks; or 369 | 370 | f) Requiring indemnification of licensors and authors of that 371 | material by anyone who conveys the material (or modified versions of 372 | it) with contractual assumptions of liability to the recipient, for 373 | any liability that these contractual assumptions directly impose on 374 | those licensors and authors. 375 | 376 | All other non-permissive additional terms are considered "further 377 | restrictions" within the meaning of section 10. If the Program as you 378 | received it, or any part of it, contains a notice stating that it is 379 | governed by this License along with a term that is a further 380 | restriction, you may remove that term. If a license document contains 381 | a further restriction but permits relicensing or conveying under this 382 | License, you may add to a covered work material governed by the terms 383 | of that license document, provided that the further restriction does 384 | not survive such relicensing or conveying. 385 | 386 | If you add terms to a covered work in accord with this section, you 387 | must place, in the relevant source files, a statement of the 388 | additional terms that apply to those files, or a notice indicating 389 | where to find the applicable terms. 390 | 391 | Additional terms, permissive or non-permissive, may be stated in the 392 | form of a separately written license, or stated as exceptions; 393 | the above requirements apply either way. 394 | 395 | 8. Termination. 396 | 397 | You may not propagate or modify a covered work except as expressly 398 | provided under this License. Any attempt otherwise to propagate or 399 | modify it is void, and will automatically terminate your rights under 400 | this License (including any patent licenses granted under the third 401 | paragraph of section 11). 402 | 403 | However, if you cease all violation of this License, then your 404 | license from a particular copyright holder is reinstated (a) 405 | provisionally, unless and until the copyright holder explicitly and 406 | finally terminates your license, and (b) permanently, if the copyright 407 | holder fails to notify you of the violation by some reasonable means 408 | prior to 60 days after the cessation. 409 | 410 | Moreover, your license from a particular copyright holder is 411 | reinstated permanently if the copyright holder notifies you of the 412 | violation by some reasonable means, this is the first time you have 413 | received notice of violation of this License (for any work) from that 414 | copyright holder, and you cure the violation prior to 30 days after 415 | your receipt of the notice. 416 | 417 | Termination of your rights under this section does not terminate the 418 | licenses of parties who have received copies or rights from you under 419 | this License. If your rights have been terminated and not permanently 420 | reinstated, you do not qualify to receive new licenses for the same 421 | material under section 10. 422 | 423 | 9. Acceptance Not Required for Having Copies. 424 | 425 | You are not required to accept this License in order to receive or 426 | run a copy of the Program. Ancillary propagation of a covered work 427 | occurring solely as a consequence of using peer-to-peer transmission 428 | to receive a copy likewise does not require acceptance. However, 429 | nothing other than this License grants you permission to propagate or 430 | modify any covered work. These actions infringe copyright if you do 431 | not accept this License. Therefore, by modifying or propagating a 432 | covered work, you indicate your acceptance of this License to do so. 433 | 434 | 10. Automatic Licensing of Downstream Recipients. 435 | 436 | Each time you convey a covered work, the recipient automatically 437 | receives a license from the original licensors, to run, modify and 438 | propagate that work, subject to this License. You are not responsible 439 | for enforcing compliance by third parties with this License. 440 | 441 | An "entity transaction" is a transaction transferring control of an 442 | organization, or substantially all assets of one, or subdividing an 443 | organization, or merging organizations. If propagation of a covered 444 | work results from an entity transaction, each party to that 445 | transaction who receives a copy of the work also receives whatever 446 | licenses to the work the party's predecessor in interest had or could 447 | give under the previous paragraph, plus a right to possession of the 448 | Corresponding Source of the work from the predecessor in interest, if 449 | the predecessor has it or can get it with reasonable efforts. 450 | 451 | You may not impose any further restrictions on the exercise of the 452 | rights granted or affirmed under this License. For example, you may 453 | not impose a license fee, royalty, or other charge for exercise of 454 | rights granted under this License, and you may not initiate litigation 455 | (including a cross-claim or counterclaim in a lawsuit) alleging that 456 | any patent claim is infringed by making, using, selling, offering for 457 | sale, or importing the Program or any portion of it. 458 | 459 | 11. Patents. 460 | 461 | A "contributor" is a copyright holder who authorizes use under this 462 | License of the Program or a work on which the Program is based. The 463 | work thus licensed is called the contributor's "contributor version". 464 | 465 | A contributor's "essential patent claims" are all patent claims 466 | owned or controlled by the contributor, whether already acquired or 467 | hereafter acquired, that would be infringed by some manner, permitted 468 | by this License, of making, using, or selling its contributor version, 469 | but do not include claims that would be infringed only as a 470 | consequence of further modification of the contributor version. For 471 | purposes of this definition, "control" includes the right to grant 472 | patent sublicenses in a manner consistent with the requirements of 473 | this License. 474 | 475 | Each contributor grants you a non-exclusive, worldwide, royalty-free 476 | patent license under the contributor's essential patent claims, to 477 | make, use, sell, offer for sale, import and otherwise run, modify and 478 | propagate the contents of its contributor version. 479 | 480 | In the following three paragraphs, a "patent license" is any express 481 | agreement or commitment, however denominated, not to enforce a patent 482 | (such as an express permission to practice a patent or covenant not to 483 | sue for patent infringement). To "grant" such a patent license to a 484 | party means to make such an agreement or commitment not to enforce a 485 | patent against the party. 486 | 487 | If you convey a covered work, knowingly relying on a patent license, 488 | and the Corresponding Source of the work is not available for anyone 489 | to copy, free of charge and under the terms of this License, through a 490 | publicly available network server or other readily accessible means, 491 | then you must either (1) cause the Corresponding Source to be so 492 | available, or (2) arrange to deprive yourself of the benefit of the 493 | patent license for this particular work, or (3) arrange, in a manner 494 | consistent with the requirements of this License, to extend the patent 495 | license to downstream recipients. "Knowingly relying" means you have 496 | actual knowledge that, but for the patent license, your conveying the 497 | covered work in a country, or your recipient's use of the covered work 498 | in a country, would infringe one or more identifiable patents in that 499 | country that you have reason to believe are valid. 500 | 501 | If, pursuant to or in connection with a single transaction or 502 | arrangement, you convey, or propagate by procuring conveyance of, a 503 | covered work, and grant a patent license to some of the parties 504 | receiving the covered work authorizing them to use, propagate, modify 505 | or convey a specific copy of the covered work, then the patent license 506 | you grant is automatically extended to all recipients of the covered 507 | work and works based on it. 508 | 509 | A patent license is "discriminatory" if it does not include within 510 | the scope of its coverage, prohibits the exercise of, or is 511 | conditioned on the non-exercise of one or more of the rights that are 512 | specifically granted under this License. You may not convey a covered 513 | work if you are a party to an arrangement with a third party that is 514 | in the business of distributing software, under which you make payment 515 | to the third party based on the extent of your activity of conveying 516 | the work, and under which the third party grants, to any of the 517 | parties who would receive the covered work from you, a discriminatory 518 | patent license (a) in connection with copies of the covered work 519 | conveyed by you (or copies made from those copies), or (b) primarily 520 | for and in connection with specific products or compilations that 521 | contain the covered work, unless you entered into that arrangement, 522 | or that patent license was granted, prior to 28 March 2007. 523 | 524 | Nothing in this License shall be construed as excluding or limiting 525 | any implied license or other defenses to infringement that may 526 | otherwise be available to you under applicable patent law. 527 | 528 | 12. No Surrender of Others' Freedom. 529 | 530 | If conditions are imposed on you (whether by court order, agreement or 531 | otherwise) that contradict the conditions of this License, they do not 532 | excuse you from the conditions of this License. If you cannot convey a 533 | covered work so as to satisfy simultaneously your obligations under this 534 | License and any other pertinent obligations, then as a consequence you may 535 | not convey it at all. For example, if you agree to terms that obligate you 536 | to collect a royalty for further conveying from those to whom you convey 537 | the Program, the only way you could satisfy both those terms and this 538 | License would be to refrain entirely from conveying the Program. 539 | 540 | 13. Remote Network Interaction; Use with the GNU General Public License. 541 | 542 | Notwithstanding any other provision of this License, if you modify the 543 | Program, your modified version must prominently offer all users 544 | interacting with it remotely through a computer network (if your version 545 | supports such interaction) an opportunity to receive the Corresponding 546 | Source of your version by providing access to the Corresponding Source 547 | from a network server at no charge, through some standard or customary 548 | means of facilitating copying of software. This Corresponding Source 549 | shall include the Corresponding Source for any work covered by version 3 550 | of the GNU General Public License that is incorporated pursuant to the 551 | following paragraph. 552 | 553 | Notwithstanding any other provision of this License, you have 554 | permission to link or combine any covered work with a work licensed 555 | under version 3 of the GNU General Public License into a single 556 | combined work, and to convey the resulting work. The terms of this 557 | License will continue to apply to the part which is the covered work, 558 | but the work with which it is combined will remain governed by version 559 | 3 of the GNU General Public License. 560 | 561 | 14. Revised Versions of this License. 562 | 563 | The Free Software Foundation may publish revised and/or new versions of 564 | the GNU Affero General Public License from time to time. Such new versions 565 | will be similar in spirit to the present version, but may differ in detail to 566 | address new problems or concerns. 567 | 568 | Each version is given a distinguishing version number. If the 569 | Program specifies that a certain numbered version of the GNU Affero General 570 | Public License "or any later version" applies to it, you have the 571 | option of following the terms and conditions either of that numbered 572 | version or of any later version published by the Free Software 573 | Foundation. If the Program does not specify a version number of the 574 | GNU Affero General Public License, you may choose any version ever published 575 | by the Free Software Foundation. 576 | 577 | If the Program specifies that a proxy can decide which future 578 | versions of the GNU Affero General Public License can be used, that proxy's 579 | public statement of acceptance of a version permanently authorizes you 580 | to choose that version for the Program. 581 | 582 | Later license versions may give you additional or different 583 | permissions. However, no additional obligations are imposed on any 584 | author or copyright holder as a result of your choosing to follow a 585 | later version. 586 | 587 | 15. Disclaimer of Warranty. 588 | 589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 597 | 598 | 16. Limitation of Liability. 599 | 600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 608 | SUCH DAMAGES. 609 | 610 | 17. Interpretation of Sections 15 and 16. 611 | 612 | If the disclaimer of warranty and limitation of liability provided 613 | above cannot be given local legal effect according to their terms, 614 | reviewing courts shall apply local law that most closely approximates 615 | an absolute waiver of all civil liability in connection with the 616 | Program, unless a warranty or assumption of liability accompanies a 617 | copy of the Program in return for a fee. 618 | 619 | END OF TERMS AND CONDITIONS 620 | 621 | How to Apply These Terms to Your New Programs 622 | 623 | If you develop a new program, and you want it to be of the greatest 624 | possible use to the public, the best way to achieve this is to make it 625 | free software which everyone can redistribute and change under these terms. 626 | 627 | To do so, attach the following notices to the program. It is safest 628 | to attach them to the start of each source file to most effectively 629 | state the exclusion of warranty; and each file should have at least 630 | the "copyright" line and a pointer to where the full notice is found. 631 | 632 | 633 | Copyright (C) 634 | 635 | This program is free software: you can redistribute it and/or modify 636 | it under the terms of the GNU Affero General Public License as published 637 | by the Free Software Foundation, either version 3 of the License, or 638 | (at your option) any later version. 639 | 640 | This program is distributed in the hope that it will be useful, 641 | but WITHOUT ANY WARRANTY; without even the implied warranty of 642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 643 | GNU Affero General Public License for more details. 644 | 645 | You should have received a copy of the GNU Affero General Public License 646 | along with this program. If not, see . 647 | 648 | Also add information on how to contact you by electronic and paper mail. 649 | 650 | If your software can interact with users remotely through a computer 651 | network, you should also make sure that it provides a way for users to 652 | get its source. For example, if your program is a web application, its 653 | interface could display a "Source" link that leads users to an archive 654 | of the code. There are many ways you could offer source, and different 655 | solutions will be better for different programs; see section 13 for the 656 | specific requirements. 657 | 658 | You should also get your employer (if you work as a programmer) or school, 659 | if any, to sign a "copyright disclaimer" for the program, if necessary. 660 | For more information on this, and how to apply and follow the GNU AGPL, see 661 | . 662 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🗺️ osm-revert 2 | 3 | ![Python version](https://shields.monicz.dev/badge/python-v3.13-blue) 4 | [![Liberapay Patrons](https://shields.monicz.dev/liberapay/patrons/Zaczero?logo=liberapay&label=Patrons)](https://liberapay.com/Zaczero/) 5 | [![GitHub Sponsors](https://shields.monicz.dev/github/sponsors/Zaczero?logo=github&label=Sponsors&color=%23db61a2)](https://github.com/sponsors/Zaczero) 6 | [![GitHub Stars](https://shields.monicz.dev/github/stars/Zaczero/osm-revert?style=social)](https://github.com/Zaczero/osm-revert) 7 | 8 | The next generation OpenStreetMap changeset revert tool. 9 | 10 | You can access the **official instance** of osm-revert at [revert.monicz.dev](https://revert.monicz.dev). 11 | 12 |
13 | Demo (GIF) 14 | Tool demo 15 |
16 | 17 | ## Highlights 18 | 19 | ### ⚡️ Faster 20 | 21 | This reverter uses [Overpass](https://overpass-api.de) to reduce the amount of API calls. 22 | 23 | ### 🧠 Smarter 24 | 25 | This reverter uses [Google's diff-match-patch](https://github.com/google/diff-match-patch) library to automatically resolve conflicts. 26 | 27 | ### ♾️ Limitless 28 | 29 | This reverter has no arbitrary limits on the changeset size. 30 | 31 | ## Footer 32 | 33 | ### Credits 34 | 35 | This project was inspired by the work of [Zverik](https://github.com/Zverik) and his [RevertUI](https://github.com/Zverik/RevertUI). 36 | 37 | ### Contact me 38 | 39 | 40 | -------------------------------------------------------------------------------- /icon/favicon_package_v0.16.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/icon/favicon_package_v0.16.zip -------------------------------------------------------------------------------- /icon/osm-revert.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/icon/osm-revert.gif -------------------------------------------------------------------------------- /icon/osm-revert.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/icon/osm-revert.xcf -------------------------------------------------------------------------------- /osm_revert/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.parse import urlsplit 3 | 4 | import sentry_sdk 5 | from githead import githead 6 | from pydantic import SecretStr 7 | from sentry_sdk.integrations.pure_eval import PureEvalIntegration 8 | 9 | VERSION = 'git#' + githead()[:7] 10 | WEBSITE = os.getenv('OSM_REVERT_WEBSITE') 11 | CREATED_BY = f'osm-revert {VERSION}' 12 | USER_AGENT = f'osm-revert/{VERSION} (+https://github.com/Zaczero/osm-revert)' 13 | 14 | TEST_ENV = os.getenv('TEST_ENV', '0').strip().lower() in ('1', 'true', 'yes') 15 | if TEST_ENV: 16 | print('[CONF] Running in test environment') # noqa: T201 17 | 18 | CHANGESETS_LIMIT_CONFIG = { 19 | '': { 20 | 0: 0, 21 | 10: 1, 22 | 100: 3, 23 | 500: 10, 24 | 3000: 30, 25 | }, 26 | 'moderator': {0: 50}, 27 | } 28 | 29 | TAG_MAX_LENGTH = 255 30 | TAG_PREFIX = 'revert' 31 | NO_TAG_PREFIX = {'comment', 'changesets_count', 'created_by', 'host', 'website'} 32 | 33 | REVERT_TO_DATE = os.getenv('REVERT_TO_DATE', None) 34 | CHANGESETS_LIMIT_MODERATOR_REVERT = int(os.getenv('CHANGESETS_LIMIT_MODERATOR_REVERT', 2000)) 35 | 36 | OSM_URL = os.getenv('OSM_URL', 'https://www.openstreetmap.org') 37 | OSM_API_URL = os.getenv('OSM_API_URL', 'https://api.openstreetmap.org') 38 | OVERPASS_URLS = os.getenv('OVERPASS_URLS', 'https://overpass-api.de/api').split() 39 | 40 | OSM_CLIENT = os.environ['OSM_CLIENT'] 41 | OSM_SECRET = SecretStr(os.environ['OSM_SECRET']) 42 | OSM_SCOPES = 'read_prefs write_api' 43 | CONNECTION_LIMIT = int(os.getenv('CONNECTION_LIMIT', 2)) 44 | 45 | if SENTRY_DSN := os.getenv('SENTRY_DSN'): 46 | sentry_sdk.init( 47 | dsn=SENTRY_DSN, 48 | release=VERSION, 49 | environment=urlsplit(OSM_URL).hostname, 50 | enable_tracing=True, 51 | traces_sample_rate=0.5, 52 | trace_propagation_targets=None, 53 | profiles_sample_rate=0.5, 54 | integrations=(PureEvalIntegration(),), 55 | _experiments={'continuous_profiling_auto_start': True}, 56 | ) 57 | -------------------------------------------------------------------------------- /osm_revert/context_logger.py: -------------------------------------------------------------------------------- 1 | from asyncio import Queue 2 | from contextlib import contextmanager 3 | from contextvars import ContextVar 4 | 5 | _log_queue: ContextVar[Queue[str]] = ContextVar('log_queue') 6 | 7 | 8 | @contextmanager 9 | def context_logger(): 10 | queue: Queue[str] = Queue() 11 | token = _log_queue.set(queue) 12 | try: 13 | yield queue 14 | finally: 15 | _log_queue.reset(token) 16 | queue.shutdown() 17 | 18 | 19 | def context_print(msg: str) -> None: 20 | queue = _log_queue.get() 21 | if queue is not None: 22 | queue.put_nowait(msg) 23 | else: 24 | print(msg) # noqa: T201 25 | -------------------------------------------------------------------------------- /osm_revert/diff_entry.py: -------------------------------------------------------------------------------- 1 | from typing import NamedTuple 2 | 3 | 4 | class DiffEntry(NamedTuple): 5 | timestamp: int 6 | element_id: str 7 | element_old: dict 8 | element_new: dict 9 | element_current: dict 10 | -------------------------------------------------------------------------------- /osm_revert/diff_match_patch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | """Diff Match and Patch 4 | Copyright 2018 The diff-match-patch Authors. 5 | https://github.com/google/diff-match-patch 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | """ 19 | 20 | """Functions for diff, match and patch. 21 | 22 | Computes the difference between two texts to create a patch. 23 | Applies the patch onto another text, allowing for errors. 24 | """ 25 | 26 | __author__ = 'fraser@google.com (Neil Fraser)' 27 | 28 | import re 29 | import sys 30 | import time 31 | 32 | 33 | class diff_match_patch: 34 | """Class containing the diff, match and patch methods. 35 | 36 | Also contains the behaviour settings. 37 | """ 38 | 39 | def __init__(self): 40 | """Inits a diff_match_patch object with default settings. 41 | Redefine these in your program to override the defaults. 42 | """ 43 | 44 | # Number of seconds to map a diff before giving up (0 for infinity). 45 | self.Diff_Timeout = 1.0 46 | # Cost of an empty edit operation in terms of edit characters. 47 | self.Diff_EditCost = 4 48 | # At what point is no match declared (0.0 = perfection, 1.0 = very loose). 49 | self.Match_Threshold = 0.5 50 | # How far to search for a match (0 = exact location, 1000+ = broad match). 51 | # A match this many characters away from the expected location will add 52 | # 1.0 to the score (0.0 is a perfect match). 53 | self.Match_Distance = 1000 54 | # When deleting a large block of text (over ~64 characters), how close do 55 | # the contents have to be to match the expected contents. (0.0 = perfection, 56 | # 1.0 = very loose). Note that Match_Threshold controls how closely the 57 | # end points of a delete need to match. 58 | self.Patch_DeleteThreshold = 0.5 59 | # Chunk size for context length. 60 | self.Patch_Margin = 4 61 | 62 | # The number of bits in an int. 63 | # Python has no maximum, thus to disable patch splitting set to 0. 64 | # However to avoid long patches in certain pathological cases, use 32. 65 | # Multiple short patches (using native ints) are much faster than long ones. 66 | self.Match_MaxBits = 32 67 | 68 | # DIFF FUNCTIONS 69 | 70 | # The data structure representing a diff is an array of tuples: 71 | # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")] 72 | # which means: delete "Hello", add "Goodbye" and keep " world." 73 | DIFF_DELETE = -1 74 | DIFF_INSERT = 1 75 | DIFF_EQUAL = 0 76 | 77 | def diff_main(self, text1, text2, checklines=True, deadline=None): 78 | """Find the differences between two texts. Simplifies the problem by 79 | stripping any common prefix or suffix off the texts before diffing. 80 | 81 | Args: 82 | text1: Old string to be diffed. 83 | text2: New string to be diffed. 84 | checklines: Optional speedup flag. If present and false, then don't run 85 | a line-level diff first to identify the changed areas. 86 | Defaults to true, which does a faster, slightly less optimal diff. 87 | deadline: Optional time when the diff should be complete by. Used 88 | internally for recursive calls. Users should set DiffTimeout instead. 89 | 90 | Returns: 91 | Array of changes. 92 | """ 93 | # Set a deadline by which time the diff must be complete. 94 | if deadline is None: 95 | # Unlike in most languages, Python counts time in seconds. 96 | deadline = sys.maxsize if self.Diff_Timeout <= 0 else time.time() + self.Diff_Timeout 97 | 98 | # Check for null inputs. 99 | if text1 is None or text2 is None: 100 | raise ValueError("Null inputs. (diff_main)") 101 | 102 | # Check for equality (speedup). 103 | if text1 == text2: 104 | if text1: 105 | return [(self.DIFF_EQUAL, text1)] 106 | return [] 107 | 108 | # Trim off common prefix (speedup). 109 | commonlength = self.diff_commonPrefix(text1, text2) 110 | commonprefix = text1[:commonlength] 111 | text1 = text1[commonlength:] 112 | text2 = text2[commonlength:] 113 | 114 | # Trim off common suffix (speedup). 115 | commonlength = self.diff_commonSuffix(text1, text2) 116 | if not commonlength: 117 | commonsuffix = '' 118 | else: 119 | commonsuffix = text1[-commonlength:] 120 | text1 = text1[:-commonlength] 121 | text2 = text2[:-commonlength] 122 | 123 | # Compute the diff on the middle block. 124 | diffs = self.diff_compute(text1, text2, checklines, deadline) 125 | 126 | # Restore the prefix and suffix. 127 | if commonprefix: 128 | diffs[:0] = [(self.DIFF_EQUAL, commonprefix)] 129 | if commonsuffix: 130 | diffs.append((self.DIFF_EQUAL, commonsuffix)) 131 | self.diff_cleanupMerge(diffs) 132 | return diffs 133 | 134 | def diff_compute(self, text1, text2, checklines, deadline): 135 | """Find the differences between two texts. Assumes that the texts do not 136 | have any common prefix or suffix. 137 | 138 | Args: 139 | text1: Old string to be diffed. 140 | text2: New string to be diffed. 141 | checklines: Speedup flag. If false, then don't run a line-level diff 142 | first to identify the changed areas. 143 | If true, then run a faster, slightly less optimal diff. 144 | deadline: Time when the diff should be complete by. 145 | 146 | Returns: 147 | Array of changes. 148 | """ 149 | if not text1: 150 | # Just add some text (speedup). 151 | return [(self.DIFF_INSERT, text2)] 152 | 153 | if not text2: 154 | # Just delete some text (speedup). 155 | return [(self.DIFF_DELETE, text1)] 156 | 157 | if len(text1) > len(text2): 158 | (longtext, shorttext) = (text1, text2) 159 | else: 160 | (shorttext, longtext) = (text1, text2) 161 | i = longtext.find(shorttext) 162 | if i != -1: 163 | # Shorter text is inside the longer text (speedup). 164 | diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext), 165 | (self.DIFF_INSERT, longtext[i + len(shorttext):])] 166 | # Swap insertions for deletions if diff is reversed. 167 | if len(text1) > len(text2): 168 | diffs[0] = (self.DIFF_DELETE, diffs[0][1]) 169 | diffs[2] = (self.DIFF_DELETE, diffs[2][1]) 170 | return diffs 171 | 172 | if len(shorttext) == 1: 173 | # Single character string. 174 | # After the previous speedup, the character can't be an equality. 175 | return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 176 | 177 | # Check to see if the problem can be split in two. 178 | hm = self.diff_halfMatch(text1, text2) 179 | if hm: 180 | # A half-match was found, sort out the return data. 181 | (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 182 | # Send both pairs off for separate processing. 183 | diffs_a = self.diff_main(text1_a, text2_a, checklines, deadline) 184 | diffs_b = self.diff_main(text1_b, text2_b, checklines, deadline) 185 | # Merge the results. 186 | return [*diffs_a, (self.DIFF_EQUAL, mid_common), *diffs_b] 187 | 188 | if checklines and len(text1) > 100 and len(text2) > 100: 189 | return self.diff_lineMode(text1, text2, deadline) 190 | 191 | return self.diff_bisect(text1, text2, deadline) 192 | 193 | def diff_lineMode(self, text1, text2, deadline): 194 | """Do a quick line-level diff on both strings, then rediff the parts for 195 | greater accuracy. 196 | This speedup can produce non-minimal diffs. 197 | 198 | Args: 199 | text1: Old string to be diffed. 200 | text2: New string to be diffed. 201 | deadline: Time when the diff should be complete by. 202 | 203 | Returns: 204 | Array of changes. 205 | """ 206 | 207 | # Scan the text on a line-by-line basis first. 208 | (text1, text2, linearray) = self.diff_linesToChars(text1, text2) 209 | 210 | diffs = self.diff_main(text1, text2, False, deadline) 211 | 212 | # Convert the diff back to original text. 213 | self.diff_charsToLines(diffs, linearray) 214 | # Eliminate freak matches (e.g. blank lines) 215 | self.diff_cleanupSemantic(diffs) 216 | 217 | # Rediff any replacement blocks, this time character-by-character. 218 | # Add a dummy entry at the end. 219 | diffs.append((self.DIFF_EQUAL, '')) 220 | pointer = 0 221 | count_delete = 0 222 | count_insert = 0 223 | text_delete = '' 224 | text_insert = '' 225 | while pointer < len(diffs): 226 | if diffs[pointer][0] == self.DIFF_INSERT: 227 | count_insert += 1 228 | text_insert += diffs[pointer][1] 229 | elif diffs[pointer][0] == self.DIFF_DELETE: 230 | count_delete += 1 231 | text_delete += diffs[pointer][1] 232 | elif diffs[pointer][0] == self.DIFF_EQUAL: 233 | # Upon reaching an equality, check for prior redundancies. 234 | if count_delete >= 1 and count_insert >= 1: 235 | # Delete the offending records and add the merged ones. 236 | subDiff = self.diff_main(text_delete, text_insert, False, deadline) 237 | diffs[pointer - count_delete - count_insert : pointer] = subDiff 238 | pointer = pointer - count_delete - count_insert + len(subDiff) 239 | count_insert = 0 240 | count_delete = 0 241 | text_delete = '' 242 | text_insert = '' 243 | 244 | pointer += 1 245 | 246 | diffs.pop() # Remove the dummy entry at the end. 247 | 248 | return diffs 249 | 250 | def diff_bisect(self, text1, text2, deadline): 251 | """Find the 'middle snake' of a diff, split the problem in two 252 | and return the recursively constructed diff. 253 | See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. 254 | 255 | Args: 256 | text1: Old string to be diffed. 257 | text2: New string to be diffed. 258 | deadline: Time at which to bail if not yet complete. 259 | 260 | Returns: 261 | Array of diff tuples. 262 | """ 263 | 264 | # Cache the text lengths to prevent multiple calls. 265 | text1_length = len(text1) 266 | text2_length = len(text2) 267 | max_d = (text1_length + text2_length + 1) // 2 268 | v_offset = max_d 269 | v_length = 2 * max_d 270 | v1 = [-1] * v_length 271 | v1[v_offset + 1] = 0 272 | v2 = v1[:] 273 | delta = text1_length - text2_length 274 | # If the total number of characters is odd, then the front path will 275 | # collide with the reverse path. 276 | front = bool(delta % 2) 277 | # Offsets for start and end of k loop. 278 | # Prevents mapping of space beyond the grid. 279 | k1start = 0 280 | k1end = 0 281 | k2start = 0 282 | k2end = 0 283 | for d in range(max_d): 284 | # Bail out if deadline is reached. 285 | if time.time() > deadline: 286 | break 287 | 288 | # Walk the front path one step. 289 | for k1 in range(-d + k1start, d + 1 - k1end, 2): 290 | k1_offset = v_offset + k1 291 | if k1 == -d or (k1 != d and 292 | v1[k1_offset - 1] < v1[k1_offset + 1]): 293 | x1 = v1[k1_offset + 1] 294 | else: 295 | x1 = v1[k1_offset - 1] + 1 296 | y1 = x1 - k1 297 | while (x1 < text1_length and y1 < text2_length and 298 | text1[x1] == text2[y1]): 299 | x1 += 1 300 | y1 += 1 301 | v1[k1_offset] = x1 302 | if x1 > text1_length: 303 | # Ran off the right of the graph. 304 | k1end += 2 305 | elif y1 > text2_length: 306 | # Ran off the bottom of the graph. 307 | k1start += 2 308 | elif front: 309 | k2_offset = v_offset + delta - k1 310 | if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] != -1: 311 | # Mirror x2 onto top-left coordinate system. 312 | x2 = text1_length - v2[k2_offset] 313 | if x1 >= x2: 314 | # Overlap detected. 315 | return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 316 | 317 | # Walk the reverse path one step. 318 | for k2 in range(-d + k2start, d + 1 - k2end, 2): 319 | k2_offset = v_offset + k2 320 | if k2 == -d or (k2 != d and 321 | v2[k2_offset - 1] < v2[k2_offset + 1]): 322 | x2 = v2[k2_offset + 1] 323 | else: 324 | x2 = v2[k2_offset - 1] + 1 325 | y2 = x2 - k2 326 | while (x2 < text1_length and y2 < text2_length and 327 | text1[-x2 - 1] == text2[-y2 - 1]): 328 | x2 += 1 329 | y2 += 1 330 | v2[k2_offset] = x2 331 | if x2 > text1_length: 332 | # Ran off the left of the graph. 333 | k2end += 2 334 | elif y2 > text2_length: 335 | # Ran off the top of the graph. 336 | k2start += 2 337 | elif not front: 338 | k1_offset = v_offset + delta - k2 339 | if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] != -1: 340 | x1 = v1[k1_offset] 341 | y1 = v_offset + x1 - k1_offset 342 | # Mirror x2 onto top-left coordinate system. 343 | x2 = text1_length - x2 344 | if x1 >= x2: 345 | # Overlap detected. 346 | return self.diff_bisectSplit(text1, text2, x1, y1, deadline) 347 | 348 | # Diff took too long and hit the deadline or 349 | # number of diffs equals number of characters, no commonality at all. 350 | return [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)] 351 | 352 | def diff_bisectSplit(self, text1, text2, x, y, deadline): 353 | """Given the location of the 'middle snake', split the diff in two parts 354 | and recurse. 355 | 356 | Args: 357 | text1: Old string to be diffed. 358 | text2: New string to be diffed. 359 | x: Index of split point in text1. 360 | y: Index of split point in text2. 361 | deadline: Time at which to bail if not yet complete. 362 | 363 | Returns: 364 | Array of diff tuples. 365 | """ 366 | text1a = text1[:x] 367 | text2a = text2[:y] 368 | text1b = text1[x:] 369 | text2b = text2[y:] 370 | 371 | # Compute both diffs serially. 372 | diffs = self.diff_main(text1a, text2a, False, deadline) 373 | diffsb = self.diff_main(text1b, text2b, False, deadline) 374 | 375 | return diffs + diffsb 376 | 377 | def diff_linesToChars(self, text1, text2, text3): 378 | """Split two texts into an array of strings. Reduce the texts to a string 379 | of hashes where each Unicode character represents one line. 380 | 381 | Args: 382 | text1: First string. 383 | text2: Second string. 384 | 385 | Returns: 386 | Three element tuple, containing the encoded text1, the encoded text2 and 387 | the array of unique strings. The zeroth element of the array of unique 388 | strings is intentionally blank. 389 | """ 390 | lineArray = [] # e.g. lineArray[4] == "Hello\n" 391 | lineHash = {} # e.g. lineHash["Hello\n"] == 4 392 | 393 | # "\x00" is a valid character, but various debuggers don't like it. 394 | # So we'll insert a junk entry to avoid generating a null character. 395 | lineArray.append('') 396 | 397 | def diff_linesToCharsMunge(text): 398 | """Split a text into an array of strings. Reduce the texts to a string 399 | of hashes where each Unicode character represents one line. 400 | Modifies linearray and linehash through being a closure. 401 | 402 | Args: 403 | text: String to encode. 404 | 405 | Returns: 406 | Encoded string. 407 | """ 408 | chars = [] 409 | # Walk the text, pulling out a substring for each line. 410 | # text.split('\n') would would temporarily double our memory footprint. 411 | # Modifying text would create many large strings to garbage collect. 412 | lineStart = 0 413 | lineEnd = -1 414 | while lineEnd < len(text) - 1: 415 | lineEnd = text.find('\n', lineStart) 416 | if lineEnd == -1: 417 | lineEnd = len(text) - 1 418 | line = text[lineStart:lineEnd + 1] 419 | 420 | if line in lineHash: 421 | chars.append(chr(lineHash[line])) 422 | else: 423 | if len(lineArray) == maxLines: 424 | # Bail out at 1114111 because chr(1114112) throws. 425 | line = text[lineStart:] 426 | lineEnd = len(text) 427 | lineArray.append(line) 428 | lineHash[line] = len(lineArray) - 1 429 | chars.append(chr(len(lineArray) - 1)) 430 | lineStart = lineEnd + 1 431 | return "".join(chars) 432 | 433 | # Allocate 2/3rds of the space for text1, the rest for text2. 434 | maxLines = 666666 435 | chars1 = diff_linesToCharsMunge(text1) 436 | maxLines = 557056 437 | chars2 = diff_linesToCharsMunge(text2) 438 | maxLines = 557055 439 | chars3 = diff_linesToCharsMunge(text3) 440 | return (chars1, chars2, chars3, lineArray) 441 | 442 | def diff_charsToLinesText(self, chars, lineArray): 443 | return "".join(lineArray[ord(char)] for char in chars) 444 | 445 | def diff_charsToLines(self, diffs, lineArray): 446 | """Rehydrate the text in a diff from a string of line hashes to real lines 447 | of text. 448 | 449 | Args: 450 | diffs: Array of diff tuples. 451 | lineArray: Array of unique strings. 452 | """ 453 | for i, (diff_type, diff_text) in enumerate(diffs): 454 | diffs[i] = (diff_type, "".join(lineArray[ord(char)] for char in diff_text)) 455 | 456 | def diff_commonPrefix(self, text1, text2): 457 | """Determine the common prefix of two strings. 458 | 459 | Args: 460 | text1: First string. 461 | text2: Second string. 462 | 463 | Returns: 464 | The number of characters common to the start of each string. 465 | """ 466 | # Quick check for common null cases. 467 | if not text1 or not text2 or text1[0] != text2[0]: 468 | return 0 469 | # Binary search. 470 | # Performance analysis: https://neil.fraser.name/news/2007/10/09/ 471 | pointermin = 0 472 | pointermax = min(len(text1), len(text2)) 473 | pointermid = pointermax 474 | pointerstart = 0 475 | while pointermin < pointermid: 476 | if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]: 477 | pointermin = pointermid 478 | pointerstart = pointermin 479 | else: 480 | pointermax = pointermid 481 | pointermid = (pointermax - pointermin) // 2 + pointermin 482 | return pointermid 483 | 484 | def diff_commonSuffix(self, text1, text2): 485 | """Determine the common suffix of two strings. 486 | 487 | Args: 488 | text1: First string. 489 | text2: Second string. 490 | 491 | Returns: 492 | The number of characters common to the end of each string. 493 | """ 494 | # Quick check for common null cases. 495 | if not text1 or not text2 or text1[-1] != text2[-1]: 496 | return 0 497 | # Binary search. 498 | # Performance analysis: https://neil.fraser.name/news/2007/10/09/ 499 | pointermin = 0 500 | pointermax = min(len(text1), len(text2)) 501 | pointermid = pointermax 502 | pointerend = 0 503 | while pointermin < pointermid: 504 | if (text1[-pointermid:len(text1) - pointerend] == 505 | text2[-pointermid:len(text2) - pointerend]): 506 | pointermin = pointermid 507 | pointerend = pointermin 508 | else: 509 | pointermax = pointermid 510 | pointermid = (pointermax - pointermin) // 2 + pointermin 511 | return pointermid 512 | 513 | def diff_commonOverlap(self, text1, text2): 514 | """Determine if the suffix of one string is the prefix of another. 515 | 516 | Args: 517 | text1 First string. 518 | text2 Second string. 519 | 520 | Returns: 521 | The number of characters common to the end of the first 522 | string and the start of the second string. 523 | """ 524 | # Cache the text lengths to prevent multiple calls. 525 | text1_length = len(text1) 526 | text2_length = len(text2) 527 | # Eliminate the null case. 528 | if not text1_length or not text2_length: 529 | return 0 530 | # Truncate the longer string. 531 | if text1_length > text2_length: 532 | text1 = text1[-text2_length:] 533 | elif text1_length < text2_length: 534 | text2 = text2[:text1_length] 535 | text_length = min(text1_length, text2_length) 536 | # Quick check for the worst case. 537 | if text1 == text2: 538 | return text_length 539 | 540 | # Start by looking for a single character match 541 | # and increase length until no match is found. 542 | # Performance analysis: https://neil.fraser.name/news/2010/11/04/ 543 | best = 0 544 | length = 1 545 | while True: 546 | pattern = text1[-length:] 547 | found = text2.find(pattern) 548 | if found == -1: 549 | return best 550 | length += found 551 | if not found or text1[-length:] == text2[:length]: 552 | best = length 553 | length += 1 554 | 555 | def diff_halfMatch(self, text1, text2): 556 | """Do the two texts share a substring which is at least half the length of 557 | the longer text? 558 | This speedup can produce non-minimal diffs. 559 | 560 | Args: 561 | text1: First string. 562 | text2: Second string. 563 | 564 | Returns: 565 | Five element Array, containing the prefix of text1, the suffix of text1, 566 | the prefix of text2, the suffix of text2 and the common middle. Or None 567 | if there was no match. 568 | """ 569 | if self.Diff_Timeout <= 0: 570 | # Don't risk returning a non-optimal diff if we have unlimited time. 571 | return None 572 | if len(text1) > len(text2): 573 | (longtext, shorttext) = (text1, text2) 574 | else: 575 | (shorttext, longtext) = (text1, text2) 576 | if len(longtext) < 4 or len(shorttext) * 2 < len(longtext): 577 | return None # Pointless. 578 | 579 | def diff_halfMatchI(longtext, shorttext, i): 580 | """Does a substring of shorttext exist within longtext such that the 581 | substring is at least half the length of longtext? 582 | Closure, but does not reference any external variables. 583 | 584 | Args: 585 | longtext: Longer string. 586 | shorttext: Shorter string. 587 | i: Start index of quarter length substring within longtext. 588 | 589 | Returns: 590 | Five element Array, containing the prefix of longtext, the suffix of 591 | longtext, the prefix of shorttext, the suffix of shorttext and the 592 | common middle. Or None if there was no match. 593 | """ 594 | seed = longtext[i:i + len(longtext) // 4] 595 | best_common = '' 596 | j = shorttext.find(seed) 597 | while j != -1: 598 | prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:]) 599 | suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j]) 600 | if len(best_common) < suffixLength + prefixLength: 601 | best_common = (shorttext[j - suffixLength:j] + 602 | shorttext[j:j + prefixLength]) 603 | best_longtext_a = longtext[:i - suffixLength] 604 | best_longtext_b = longtext[i + prefixLength:] 605 | best_shorttext_a = shorttext[:j - suffixLength] 606 | best_shorttext_b = shorttext[j + prefixLength:] 607 | j = shorttext.find(seed, j + 1) 608 | 609 | if len(best_common) * 2 >= len(longtext): 610 | return (best_longtext_a, best_longtext_b, 611 | best_shorttext_a, best_shorttext_b, best_common) 612 | else: 613 | return None 614 | 615 | # First check if the second quarter is the seed for a half-match. 616 | hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) // 4) 617 | # Check again based on the third quarter. 618 | hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) // 2) 619 | if not hm1 and not hm2: 620 | return None 621 | elif not hm2: 622 | hm = hm1 623 | elif not hm1: 624 | hm = hm2 625 | else: 626 | # Both matched. Select the longest. 627 | hm = hm1 if len(hm1[4]) > len(hm2[4]) else hm2 628 | 629 | # A half-match was found, sort out the return data. 630 | if len(text1) > len(text2): 631 | (text1_a, text1_b, text2_a, text2_b, mid_common) = hm 632 | else: 633 | (text2_a, text2_b, text1_a, text1_b, mid_common) = hm 634 | return (text1_a, text1_b, text2_a, text2_b, mid_common) 635 | 636 | def diff_cleanupSemantic(self, diffs): 637 | """Reduce the number of edits by eliminating semantically trivial 638 | equalities. 639 | 640 | Args: 641 | diffs: Array of diff tuples. 642 | """ 643 | changes = False 644 | equalities = [] # Stack of indices where equalities are found. 645 | lastEquality = None # Always equal to diffs[equalities[-1]][1] 646 | pointer = 0 # Index of current position. 647 | # Number of chars that changed prior to the equality. 648 | length_insertions1, length_deletions1 = 0, 0 649 | # Number of chars that changed after the equality. 650 | length_insertions2, length_deletions2 = 0, 0 651 | while pointer < len(diffs): 652 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 653 | equalities.append(pointer) 654 | length_insertions1, length_insertions2 = length_insertions2, 0 655 | length_deletions1, length_deletions2 = length_deletions2, 0 656 | lastEquality = diffs[pointer][1] 657 | else: # An insertion or deletion. 658 | if diffs[pointer][0] == self.DIFF_INSERT: 659 | length_insertions2 += len(diffs[pointer][1]) 660 | else: 661 | length_deletions2 += len(diffs[pointer][1]) 662 | # Eliminate an equality that is smaller or equal to the edits on both 663 | # sides of it. 664 | if (lastEquality and (len(lastEquality) <= 665 | max(length_insertions1, length_deletions1)) and 666 | (len(lastEquality) <= max(length_insertions2, length_deletions2))): 667 | # Duplicate record. 668 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) 669 | # Change second copy to insert. 670 | diffs[equalities[-1] + 1] = (self.DIFF_INSERT, 671 | diffs[equalities[-1] + 1][1]) 672 | # Throw away the equality we just deleted. 673 | equalities.pop() 674 | # Throw away the previous equality (it needs to be reevaluated). 675 | if equalities: 676 | equalities.pop() 677 | pointer = equalities[-1] if equalities else -1 678 | # Reset the counters. 679 | length_insertions1, length_deletions1 = 0, 0 680 | length_insertions2, length_deletions2 = 0, 0 681 | lastEquality = None 682 | changes = True 683 | pointer += 1 684 | 685 | # Normalize the diff. 686 | if changes: 687 | self.diff_cleanupMerge(diffs) 688 | self.diff_cleanupSemanticLossless(diffs) 689 | 690 | # Find any overlaps between deletions and insertions. 691 | # e.g: abcxxxxxxdef 692 | # -> abcxxxdef 693 | # e.g: xxxabcdefxxx 694 | # -> defxxxabc 695 | # Only extract an overlap if it is as big as the edit ahead or behind it. 696 | pointer = 1 697 | while pointer < len(diffs): 698 | if (diffs[pointer - 1][0] == self.DIFF_DELETE and 699 | diffs[pointer][0] == self.DIFF_INSERT): 700 | deletion = diffs[pointer - 1][1] 701 | insertion = diffs[pointer][1] 702 | overlap_length1 = self.diff_commonOverlap(deletion, insertion) 703 | overlap_length2 = self.diff_commonOverlap(insertion, deletion) 704 | if overlap_length1 >= overlap_length2: 705 | if (overlap_length1 >= len(deletion) / 2.0 or 706 | overlap_length1 >= len(insertion) / 2.0): 707 | # Overlap found. Insert an equality and trim the surrounding edits. 708 | diffs.insert(pointer, (self.DIFF_EQUAL, 709 | insertion[:overlap_length1])) 710 | diffs[pointer - 1] = (self.DIFF_DELETE, 711 | deletion[:len(deletion) - overlap_length1]) 712 | diffs[pointer + 1] = (self.DIFF_INSERT, 713 | insertion[overlap_length1:]) 714 | pointer += 1 715 | else: 716 | if (overlap_length2 >= len(deletion) / 2.0 or 717 | overlap_length2 >= len(insertion) / 2.0): 718 | # Reverse overlap found. 719 | # Insert an equality and swap and trim the surrounding edits. 720 | diffs.insert(pointer, (self.DIFF_EQUAL, deletion[:overlap_length2])) 721 | diffs[pointer - 1] = (self.DIFF_INSERT, 722 | insertion[:len(insertion) - overlap_length2]) 723 | diffs[pointer + 1] = (self.DIFF_DELETE, deletion[overlap_length2:]) 724 | pointer += 1 725 | pointer += 1 726 | pointer += 1 727 | 728 | def diff_cleanupSemanticLossless(self, diffs): 729 | """Look for single edits surrounded on both sides by equalities 730 | which can be shifted sideways to align the edit to a word boundary. 731 | e.g: The cat came. -> The cat came. 732 | 733 | Args: 734 | diffs: Array of diff tuples. 735 | """ 736 | 737 | def diff_cleanupSemanticScore(one, two): 738 | """Given two strings, compute a score representing whether the 739 | internal boundary falls on logical boundaries. 740 | Scores range from 6 (best) to 0 (worst). 741 | Closure, but does not reference any external variables. 742 | 743 | Args: 744 | one: First string. 745 | two: Second string. 746 | 747 | Returns: 748 | The score. 749 | """ 750 | if not one or not two: 751 | # Edges are the best. 752 | return 6 753 | 754 | # Each port of this function behaves slightly differently due to 755 | # subtle differences in each language's definition of things like 756 | # 'whitespace'. Since this function's purpose is largely cosmetic, 757 | # the choice has been made to use each language's native features 758 | # rather than force total conformity. 759 | char1 = one[-1] 760 | char2 = two[0] 761 | nonAlphaNumeric1 = not char1.isalnum() 762 | nonAlphaNumeric2 = not char2.isalnum() 763 | whitespace1 = nonAlphaNumeric1 and char1.isspace() 764 | whitespace2 = nonAlphaNumeric2 and char2.isspace() 765 | lineBreak1 = whitespace1 and (char1 == "\r" or char1 == "\n") 766 | lineBreak2 = whitespace2 and (char2 == "\r" or char2 == "\n") 767 | blankLine1 = lineBreak1 and self.BLANKLINEEND.search(one) 768 | blankLine2 = lineBreak2 and self.BLANKLINESTART.match(two) 769 | 770 | if blankLine1 or blankLine2: 771 | # Five points for blank lines. 772 | return 5 773 | elif lineBreak1 or lineBreak2: 774 | # Four points for line breaks. 775 | return 4 776 | elif nonAlphaNumeric1 and not whitespace1 and whitespace2: 777 | # Three points for end of sentences. 778 | return 3 779 | elif whitespace1 or whitespace2: 780 | # Two points for whitespace. 781 | return 2 782 | elif nonAlphaNumeric1 or nonAlphaNumeric2: 783 | # One point for non-alphanumeric. 784 | return 1 785 | return 0 786 | 787 | pointer = 1 788 | # Intentionally ignore the first and last element (don't need checking). 789 | while pointer < len(diffs) - 1: 790 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and 791 | diffs[pointer + 1][0] == self.DIFF_EQUAL): 792 | # This is a single edit surrounded by equalities. 793 | equality1 = diffs[pointer - 1][1] 794 | edit = diffs[pointer][1] 795 | equality2 = diffs[pointer + 1][1] 796 | 797 | # First, shift the edit as far left as possible. 798 | commonOffset = self.diff_commonSuffix(equality1, edit) 799 | if commonOffset: 800 | commonString = edit[-commonOffset:] 801 | equality1 = equality1[:-commonOffset] 802 | edit = commonString + edit[:-commonOffset] 803 | equality2 = commonString + equality2 804 | 805 | # Second, step character by character right, looking for the best fit. 806 | bestEquality1 = equality1 807 | bestEdit = edit 808 | bestEquality2 = equality2 809 | bestScore = (diff_cleanupSemanticScore(equality1, edit) + 810 | diff_cleanupSemanticScore(edit, equality2)) 811 | while edit and equality2 and edit[0] == equality2[0]: 812 | equality1 += edit[0] 813 | edit = edit[1:] + equality2[0] 814 | equality2 = equality2[1:] 815 | score = (diff_cleanupSemanticScore(equality1, edit) + 816 | diff_cleanupSemanticScore(edit, equality2)) 817 | # The >= encourages trailing rather than leading whitespace on edits. 818 | if score >= bestScore: 819 | bestScore = score 820 | bestEquality1 = equality1 821 | bestEdit = edit 822 | bestEquality2 = equality2 823 | 824 | if diffs[pointer - 1][1] != bestEquality1: 825 | # We have an improvement, save it back to the diff. 826 | if bestEquality1: 827 | diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1) 828 | else: 829 | del diffs[pointer - 1] 830 | pointer -= 1 831 | diffs[pointer] = (diffs[pointer][0], bestEdit) 832 | if bestEquality2: 833 | diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2) 834 | else: 835 | del diffs[pointer + 1] 836 | pointer -= 1 837 | pointer += 1 838 | 839 | # Define some regex patterns for matching boundaries. 840 | BLANKLINEEND = re.compile(r"\n\r?\n$") 841 | BLANKLINESTART = re.compile(r"^\r?\n\r?\n") 842 | 843 | def diff_cleanupEfficiency(self, diffs): 844 | """Reduce the number of edits by eliminating operationally trivial 845 | equalities. 846 | 847 | Args: 848 | diffs: Array of diff tuples. 849 | """ 850 | changes = False 851 | equalities = [] # Stack of indices where equalities are found. 852 | lastEquality = None # Always equal to diffs[equalities[-1]][1] 853 | pointer = 0 # Index of current position. 854 | pre_ins = False # Is there an insertion operation before the last equality. 855 | pre_del = False # Is there a deletion operation before the last equality. 856 | post_ins = False # Is there an insertion operation after the last equality. 857 | post_del = False # Is there a deletion operation after the last equality. 858 | while pointer < len(diffs): 859 | if diffs[pointer][0] == self.DIFF_EQUAL: # Equality found. 860 | if (len(diffs[pointer][1]) < self.Diff_EditCost and 861 | (post_ins or post_del)): 862 | # Candidate found. 863 | equalities.append(pointer) 864 | pre_ins = post_ins 865 | pre_del = post_del 866 | lastEquality = diffs[pointer][1] 867 | else: 868 | # Not a candidate, and can never become one. 869 | equalities = [] 870 | lastEquality = None 871 | 872 | post_ins = post_del = False 873 | else: # An insertion or deletion. 874 | if diffs[pointer][0] == self.DIFF_DELETE: 875 | post_del = True 876 | else: 877 | post_ins = True 878 | 879 | # Five types to be split: 880 | # ABXYCD 881 | # AXCD 882 | # ABXC 883 | # AXCD 884 | # ABXC 885 | 886 | if lastEquality and ((pre_ins and pre_del and post_ins and post_del) or 887 | ((len(lastEquality) < self.Diff_EditCost / 2) and 888 | (pre_ins + pre_del + post_ins + post_del) == 3)): 889 | # Duplicate record. 890 | diffs.insert(equalities[-1], (self.DIFF_DELETE, lastEquality)) 891 | # Change second copy to insert. 892 | diffs[equalities[-1] + 1] = (self.DIFF_INSERT, 893 | diffs[equalities[-1] + 1][1]) 894 | equalities.pop() # Throw away the equality we just deleted. 895 | lastEquality = None 896 | if pre_ins and pre_del: 897 | # No changes made which could affect previous entry, keep going. 898 | post_ins = post_del = True 899 | equalities = [] 900 | else: 901 | if equalities: 902 | equalities.pop() # Throw away the previous equality. 903 | pointer = equalities[-1] if equalities else -1 904 | post_ins = post_del = False 905 | changes = True 906 | pointer += 1 907 | 908 | if changes: 909 | self.diff_cleanupMerge(diffs) 910 | 911 | def diff_cleanupMerge(self, diffs): 912 | """Reorder and merge like edit sections. Merge equalities. 913 | Any edit section can move as long as it doesn't cross an equality. 914 | 915 | Args: 916 | diffs: Array of diff tuples. 917 | """ 918 | diffs.append((self.DIFF_EQUAL, '')) # Add a dummy entry at the end. 919 | pointer = 0 920 | count_delete = 0 921 | count_insert = 0 922 | text_delete = '' 923 | text_insert = '' 924 | while pointer < len(diffs): 925 | if diffs[pointer][0] == self.DIFF_INSERT: 926 | count_insert += 1 927 | text_insert += diffs[pointer][1] 928 | pointer += 1 929 | elif diffs[pointer][0] == self.DIFF_DELETE: 930 | count_delete += 1 931 | text_delete += diffs[pointer][1] 932 | pointer += 1 933 | elif diffs[pointer][0] == self.DIFF_EQUAL: 934 | # Upon reaching an equality, check for prior redundancies. 935 | if count_delete + count_insert > 1: 936 | if count_delete and count_insert: 937 | # Factor out any common prefixies. 938 | commonlength = self.diff_commonPrefix(text_insert, text_delete) 939 | if commonlength: 940 | x = pointer - count_delete - count_insert - 1 941 | if x >= 0 and diffs[x][0] == self.DIFF_EQUAL: 942 | diffs[x] = (diffs[x][0], diffs[x][1] + 943 | text_insert[:commonlength]) 944 | else: 945 | diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength])) 946 | pointer += 1 947 | text_insert = text_insert[commonlength:] 948 | text_delete = text_delete[commonlength:] 949 | # Factor out any common suffixies. 950 | commonlength = self.diff_commonSuffix(text_insert, text_delete) 951 | if commonlength: 952 | diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] + 953 | diffs[pointer][1]) 954 | text_insert = text_insert[:-commonlength] 955 | text_delete = text_delete[:-commonlength] 956 | # Delete the offending records and add the merged ones. 957 | new_ops = [] 958 | if text_delete: 959 | new_ops.append((self.DIFF_DELETE, text_delete)) 960 | if text_insert: 961 | new_ops.append((self.DIFF_INSERT, text_insert)) 962 | pointer -= count_delete + count_insert 963 | diffs[pointer : pointer + count_delete + count_insert] = new_ops 964 | pointer += len(new_ops) + 1 965 | elif pointer and diffs[pointer - 1][0] == self.DIFF_EQUAL: 966 | # Merge this equality with the previous one. 967 | diffs[pointer - 1] = (diffs[pointer - 1][0], 968 | diffs[pointer - 1][1] + diffs[pointer][1]) 969 | del diffs[pointer] 970 | else: 971 | pointer += 1 972 | 973 | count_insert = 0 974 | count_delete = 0 975 | text_delete = '' 976 | text_insert = '' 977 | 978 | if diffs[-1][1] == '': 979 | diffs.pop() # Remove the dummy entry at the end. 980 | 981 | # Second pass: look for single edits surrounded on both sides by equalities 982 | # which can be shifted sideways to eliminate an equality. 983 | # e.g: ABAC -> ABAC 984 | changes = False 985 | pointer = 1 986 | # Intentionally ignore the first and last element (don't need checking). 987 | while pointer < len(diffs) - 1: 988 | if (diffs[pointer - 1][0] == self.DIFF_EQUAL and 989 | diffs[pointer + 1][0] == self.DIFF_EQUAL): 990 | # This is a single edit surrounded by equalities. 991 | if diffs[pointer][1].endswith(diffs[pointer - 1][1]): 992 | # Shift the edit over the previous equality. 993 | if diffs[pointer - 1][1] != "": 994 | diffs[pointer] = (diffs[pointer][0], 995 | diffs[pointer - 1][1] + 996 | diffs[pointer][1][:-len(diffs[pointer - 1][1])]) 997 | diffs[pointer + 1] = (diffs[pointer + 1][0], 998 | diffs[pointer - 1][1] + diffs[pointer + 1][1]) 999 | del diffs[pointer - 1] 1000 | changes = True 1001 | elif diffs[pointer][1].startswith(diffs[pointer + 1][1]): 1002 | # Shift the edit over the next equality. 1003 | diffs[pointer - 1] = (diffs[pointer - 1][0], 1004 | diffs[pointer - 1][1] + diffs[pointer + 1][1]) 1005 | diffs[pointer] = (diffs[pointer][0], 1006 | diffs[pointer][1][len(diffs[pointer + 1][1]):] + 1007 | diffs[pointer + 1][1]) 1008 | del diffs[pointer + 1] 1009 | changes = True 1010 | pointer += 1 1011 | 1012 | # If shifts were made, the diff needs reordering and another shift sweep. 1013 | if changes: 1014 | self.diff_cleanupMerge(diffs) 1015 | 1016 | def diff_xIndex(self, diffs, loc): 1017 | """loc is a location in text1, compute and return the equivalent location 1018 | in text2. e.g. "The cat" vs "The big cat", 1->1, 5->8 1019 | 1020 | Args: 1021 | diffs: Array of diff tuples. 1022 | loc: Location within text1. 1023 | 1024 | Returns: 1025 | Location within text2. 1026 | """ 1027 | chars1 = 0 1028 | chars2 = 0 1029 | last_chars1 = 0 1030 | last_chars2 = 0 1031 | for x in range(len(diffs)): 1032 | (op, text) = diffs[x] 1033 | if op != self.DIFF_INSERT: # Equality or deletion. 1034 | chars1 += len(text) 1035 | if op != self.DIFF_DELETE: # Equality or insertion. 1036 | chars2 += len(text) 1037 | if chars1 > loc: # Overshot the location. 1038 | break 1039 | last_chars1 = chars1 1040 | last_chars2 = chars2 1041 | 1042 | if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE: 1043 | # The location was deleted. 1044 | return last_chars2 1045 | # Add the remaining len(character). 1046 | return last_chars2 + (loc - last_chars1) 1047 | 1048 | def diff_text1(self, diffs): 1049 | """Compute and return the source text (all equalities and deletions). 1050 | 1051 | Args: 1052 | diffs: Array of diff tuples. 1053 | 1054 | Returns: 1055 | Source text. 1056 | """ 1057 | text = [] 1058 | for (op, data) in diffs: 1059 | if op != self.DIFF_INSERT: 1060 | text.append(data) 1061 | return "".join(text) 1062 | 1063 | def diff_text2(self, diffs): 1064 | """Compute and return the destination text (all equalities and insertions). 1065 | 1066 | Args: 1067 | diffs: Array of diff tuples. 1068 | 1069 | Returns: 1070 | Destination text. 1071 | """ 1072 | text = [] 1073 | for (op, data) in diffs: 1074 | if op != self.DIFF_DELETE: 1075 | text.append(data) 1076 | return "".join(text) 1077 | 1078 | def diff_levenshtein(self, diffs): 1079 | """Compute the Levenshtein distance; the number of inserted, deleted or 1080 | substituted characters. 1081 | 1082 | Args: 1083 | diffs: Array of diff tuples. 1084 | 1085 | Returns: 1086 | Number of changes. 1087 | """ 1088 | levenshtein = 0 1089 | insertions = 0 1090 | deletions = 0 1091 | for (op, data) in diffs: 1092 | if op == self.DIFF_INSERT: 1093 | insertions += len(data) 1094 | elif op == self.DIFF_DELETE: 1095 | deletions += len(data) 1096 | elif op == self.DIFF_EQUAL: 1097 | # A deletion and an insertion is one substitution. 1098 | levenshtein += max(insertions, deletions) 1099 | insertions = 0 1100 | deletions = 0 1101 | levenshtein += max(insertions, deletions) 1102 | return levenshtein 1103 | 1104 | # MATCH FUNCTIONS 1105 | 1106 | def match_main(self, text, pattern, loc): 1107 | """Locate the best instance of 'pattern' in 'text' near 'loc'. 1108 | 1109 | Args: 1110 | text: The text to search. 1111 | pattern: The pattern to search for. 1112 | loc: The location to search around. 1113 | 1114 | Returns: 1115 | Best match index or -1. 1116 | """ 1117 | # Check for null inputs. 1118 | if text is None or pattern is None: 1119 | raise ValueError("Null inputs. (match_main)") 1120 | 1121 | loc = max(0, min(loc, len(text))) 1122 | if text == pattern: 1123 | # Shortcut (potentially not guaranteed by the algorithm) 1124 | return 0 1125 | elif not text: 1126 | # Nothing to match. 1127 | return -1 1128 | elif text[loc:loc + len(pattern)] == pattern: 1129 | # Perfect match at the perfect spot! (Includes case of null pattern) 1130 | return loc 1131 | else: 1132 | # Do a fuzzy compare. 1133 | match = self.match_bitap(text, pattern, loc) 1134 | return match 1135 | 1136 | def match_bitap(self, text, pattern, loc): 1137 | """Locate the best instance of 'pattern' in 'text' near 'loc' using the 1138 | Bitap algorithm. 1139 | 1140 | Args: 1141 | text: The text to search. 1142 | pattern: The pattern to search for. 1143 | loc: The location to search around. 1144 | 1145 | Returns: 1146 | Best match index or -1. 1147 | """ 1148 | # Python doesn't have a maxint limit, so ignore this check. 1149 | #if self.Match_MaxBits and len(pattern) > self.Match_MaxBits: 1150 | # raise ValueError("Pattern too long for this application.") 1151 | 1152 | # Initialise the alphabet. 1153 | s = self.match_alphabet(pattern) 1154 | 1155 | def match_bitapScore(e, x): 1156 | """Compute and return the score for a match with e errors and x location. 1157 | Accesses loc and pattern through being a closure. 1158 | 1159 | Args: 1160 | e: Number of errors in match. 1161 | x: Location of match. 1162 | 1163 | Returns: 1164 | Overall score for match (0.0 = good, 1.0 = bad). 1165 | """ 1166 | accuracy = float(e) / len(pattern) 1167 | proximity = abs(loc - x) 1168 | if not self.Match_Distance: 1169 | # Dodge divide by zero error. 1170 | return proximity and 1.0 or accuracy 1171 | return accuracy + (proximity / float(self.Match_Distance)) 1172 | 1173 | # Highest score beyond which we give up. 1174 | score_threshold = self.Match_Threshold 1175 | # Is there a nearby exact match? (speedup) 1176 | best_loc = text.find(pattern, loc) 1177 | if best_loc != -1: 1178 | score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 1179 | # What about in the other direction? (speedup) 1180 | best_loc = text.rfind(pattern, loc + len(pattern)) 1181 | if best_loc != -1: 1182 | score_threshold = min(match_bitapScore(0, best_loc), score_threshold) 1183 | 1184 | # Initialise the bit arrays. 1185 | matchmask = 1 << (len(pattern) - 1) 1186 | best_loc = -1 1187 | 1188 | bin_max = len(pattern) + len(text) 1189 | # Empty initialization added to appease pychecker. 1190 | last_rd = None 1191 | for d in range(len(pattern)): 1192 | # Scan for the best match each iteration allows for one more error. 1193 | # Run a binary search to determine how far from 'loc' we can stray at 1194 | # this error level. 1195 | bin_min = 0 1196 | bin_mid = bin_max 1197 | while bin_min < bin_mid: 1198 | if match_bitapScore(d, loc + bin_mid) <= score_threshold: 1199 | bin_min = bin_mid 1200 | else: 1201 | bin_max = bin_mid 1202 | bin_mid = (bin_max - bin_min) // 2 + bin_min 1203 | 1204 | # Use the result from this iteration as the maximum for the next. 1205 | bin_max = bin_mid 1206 | start = max(1, loc - bin_mid + 1) 1207 | finish = min(loc + bin_mid, len(text)) + len(pattern) 1208 | 1209 | rd = [0] * (finish + 2) 1210 | rd[finish + 1] = (1 << d) - 1 1211 | for j in range(finish, start - 1, -1): 1212 | charMatch = 0 if len(text) <= j - 1 else s.get(text[j - 1], 0) 1213 | if not d: # First pass: exact match. 1214 | rd[j] = ((rd[j + 1] << 1) | 1) & charMatch 1215 | else: # Subsequent passes: fuzzy match. 1216 | rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) | ( 1217 | ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1] 1218 | if rd[j] & matchmask: 1219 | score = match_bitapScore(d, j - 1) 1220 | # This match will almost certainly be better than any existing match. 1221 | # But check anyway. 1222 | if score <= score_threshold: 1223 | # Told you so. 1224 | score_threshold = score 1225 | best_loc = j - 1 1226 | if best_loc > loc: 1227 | # When passing loc, don't exceed our current distance from loc. 1228 | start = max(1, 2 * loc - best_loc) 1229 | else: 1230 | # Already passed loc, downhill from here on in. 1231 | break 1232 | # No hope for a (better) match at greater error levels. 1233 | if match_bitapScore(d + 1, loc) > score_threshold: 1234 | break 1235 | last_rd = rd 1236 | return best_loc 1237 | 1238 | def match_alphabet(self, pattern): 1239 | """Initialise the alphabet for the Bitap algorithm. 1240 | 1241 | Args: 1242 | pattern: The text to encode. 1243 | 1244 | Returns: 1245 | Hash of character locations. 1246 | """ 1247 | s = {} 1248 | for char in pattern: 1249 | s[char] = 0 1250 | for i in range(len(pattern)): 1251 | s[pattern[i]] |= 1 << (len(pattern) - i - 1) 1252 | return s 1253 | 1254 | # PATCH FUNCTIONS 1255 | 1256 | def patch_addContext(self, patch, text): 1257 | """Increase the context until it is unique, 1258 | but don't let the pattern expand beyond Match_MaxBits. 1259 | 1260 | Args: 1261 | patch: The patch to grow. 1262 | text: Source text. 1263 | """ 1264 | if not text: 1265 | return 1266 | pattern = text[patch.start2 : patch.start2 + patch.length1] 1267 | padding = 0 1268 | 1269 | # Look for the first and last matches of pattern in text. If two different 1270 | # matches are found, increase the pattern length. 1271 | while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits == 1272 | 0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin - 1273 | self.Patch_Margin)): 1274 | padding += self.Patch_Margin 1275 | pattern = text[max(0, patch.start2 - padding) : 1276 | patch.start2 + patch.length1 + padding] 1277 | # Add one chunk for good luck. 1278 | padding += self.Patch_Margin 1279 | 1280 | # Add the prefix. 1281 | prefix = text[max(0, patch.start2 - padding) : patch.start2] 1282 | if prefix: 1283 | patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)] 1284 | # Add the suffix. 1285 | suffix = text[patch.start2 + patch.length1 : 1286 | patch.start2 + patch.length1 + padding] 1287 | if suffix: 1288 | patch.diffs.append((self.DIFF_EQUAL, suffix)) 1289 | 1290 | # Roll back the start points. 1291 | patch.start1 -= len(prefix) 1292 | patch.start2 -= len(prefix) 1293 | # Extend lengths. 1294 | patch.length1 += len(prefix) + len(suffix) 1295 | patch.length2 += len(prefix) + len(suffix) 1296 | 1297 | def patch_make(self, a, b=None, c=None): 1298 | """Compute a list of patches to turn text1 into text2. 1299 | Use diffs if provided, otherwise compute it ourselves. 1300 | There are four ways to call this function, depending on what data is 1301 | available to the caller: 1302 | Method 1: 1303 | a = text1, b = text2 1304 | Method 2: 1305 | a = diffs 1306 | Method 3 (optimal): 1307 | a = text1, b = diffs 1308 | Method 4 (deprecated, use method 3): 1309 | a = text1, b = text2, c = diffs 1310 | 1311 | Args: 1312 | a: text1 (methods 1,3,4) or Array of diff tuples for text1 to 1313 | text2 (method 2). 1314 | b: text2 (methods 1,4) or Array of diff tuples for text1 to 1315 | text2 (method 3) or undefined (method 2). 1316 | c: Array of diff tuples for text1 to text2 (method 4) or 1317 | undefined (methods 1,2,3). 1318 | 1319 | Returns: 1320 | Array of Patch objects. 1321 | """ 1322 | text1 = None 1323 | diffs = None 1324 | if isinstance(a, str) and isinstance(b, str) and c is None: 1325 | # Method 1: text1, text2 1326 | # Compute diffs from text1 and text2. 1327 | text1 = a 1328 | diffs = self.diff_main(text1, b, True) 1329 | if len(diffs) > 2: 1330 | self.diff_cleanupSemantic(diffs) 1331 | self.diff_cleanupEfficiency(diffs) 1332 | elif isinstance(a, list) and b is None and c is None: 1333 | # Method 2: diffs 1334 | # Compute text1 from diffs. 1335 | diffs = a 1336 | text1 = self.diff_text1(diffs) 1337 | elif isinstance(a, str) and isinstance(b, list) and c is None: 1338 | # Method 3: text1, diffs 1339 | text1 = a 1340 | diffs = b 1341 | elif (isinstance(a, str) and isinstance(b, str) and 1342 | isinstance(c, list)): 1343 | # Method 4: text1, text2, diffs 1344 | # text2 is not used. 1345 | text1 = a 1346 | diffs = c 1347 | else: 1348 | raise ValueError("Unknown call format to patch_make.") 1349 | 1350 | if not diffs: 1351 | return [] # Get rid of the None case. 1352 | patches = [] 1353 | patch = patch_obj() 1354 | char_count1 = 0 # Number of characters into the text1 string. 1355 | char_count2 = 0 # Number of characters into the text2 string. 1356 | prepatch_text = text1 # Recreate the patches to determine context info. 1357 | postpatch_text = text1 1358 | for x in range(len(diffs)): 1359 | (diff_type, diff_text) = diffs[x] 1360 | if not patch.diffs and diff_type != self.DIFF_EQUAL: 1361 | # A new patch starts here. 1362 | patch.start1 = char_count1 1363 | patch.start2 = char_count2 1364 | if diff_type == self.DIFF_INSERT: 1365 | # Insertion 1366 | patch.diffs.append(diffs[x]) 1367 | patch.length2 += len(diff_text) 1368 | postpatch_text = (postpatch_text[:char_count2] + diff_text + 1369 | postpatch_text[char_count2:]) 1370 | elif diff_type == self.DIFF_DELETE: 1371 | # Deletion. 1372 | patch.length1 += len(diff_text) 1373 | patch.diffs.append(diffs[x]) 1374 | postpatch_text = (postpatch_text[:char_count2] + 1375 | postpatch_text[char_count2 + len(diff_text):]) 1376 | elif (diff_type == self.DIFF_EQUAL and 1377 | len(diff_text) <= 2 * self.Patch_Margin and 1378 | patch.diffs and len(diffs) != x + 1): 1379 | # Small equality inside a patch. 1380 | patch.diffs.append(diffs[x]) 1381 | patch.length1 += len(diff_text) 1382 | patch.length2 += len(diff_text) 1383 | 1384 | if (diff_type == self.DIFF_EQUAL and 1385 | len(diff_text) >= 2 * self.Patch_Margin): 1386 | # Time for a new patch. 1387 | if patch.diffs: 1388 | self.patch_addContext(patch, prepatch_text) 1389 | patches.append(patch) 1390 | patch = patch_obj() 1391 | # Unlike Unidiff, our patch lists have a rolling context. 1392 | # https://github.com/google/diff-match-patch/wiki/Unidiff 1393 | # Update prepatch text & pos to reflect the application of the 1394 | # just completed patch. 1395 | prepatch_text = postpatch_text 1396 | char_count1 = char_count2 1397 | 1398 | # Update the current character count. 1399 | if diff_type != self.DIFF_INSERT: 1400 | char_count1 += len(diff_text) 1401 | if diff_type != self.DIFF_DELETE: 1402 | char_count2 += len(diff_text) 1403 | 1404 | # Pick up the leftover patch if not empty. 1405 | if patch.diffs: 1406 | self.patch_addContext(patch, prepatch_text) 1407 | patches.append(patch) 1408 | return patches 1409 | 1410 | def patch_deepCopy(self, patches): 1411 | """Given an array of patches, return another array that is identical. 1412 | 1413 | Args: 1414 | patches: Array of Patch objects. 1415 | 1416 | Returns: 1417 | Array of Patch objects. 1418 | """ 1419 | patchesCopy = [] 1420 | for patch in patches: 1421 | patchCopy = patch_obj() 1422 | # No need to deep copy the tuples since they are immutable. 1423 | patchCopy.diffs = patch.diffs[:] 1424 | patchCopy.start1 = patch.start1 1425 | patchCopy.start2 = patch.start2 1426 | patchCopy.length1 = patch.length1 1427 | patchCopy.length2 = patch.length2 1428 | patchesCopy.append(patchCopy) 1429 | return patchesCopy 1430 | 1431 | def patch_apply(self, patches, text): 1432 | """Merge a set of patches onto the text. Return a patched text, as well 1433 | as a list of true/false values indicating which patches were applied. 1434 | 1435 | Args: 1436 | patches: Array of Patch objects. 1437 | text: Old text. 1438 | 1439 | Returns: 1440 | Two element Array, containing the new text and an array of boolean values. 1441 | """ 1442 | if not patches: 1443 | return (text, []) 1444 | 1445 | # Deep copy the patches so that no changes are made to originals. 1446 | patches = self.patch_deepCopy(patches) 1447 | 1448 | nullPadding = self.patch_addPadding(patches) 1449 | text = nullPadding + text + nullPadding 1450 | self.patch_splitMax(patches) 1451 | 1452 | # delta keeps track of the offset between the expected and actual location 1453 | # of the previous patch. If there are patches expected at positions 10 and 1454 | # 20, but the first patch was found at 12, delta is 2 and the second patch 1455 | # has an effective expected position of 22. 1456 | delta = 0 1457 | results = [] 1458 | for patch in patches: 1459 | expected_loc = patch.start2 + delta 1460 | text1 = self.diff_text1(patch.diffs) 1461 | end_loc = -1 1462 | if len(text1) > self.Match_MaxBits: 1463 | # patch_splitMax will only provide an oversized pattern in the case of 1464 | # a monster delete. 1465 | start_loc = self.match_main(text, text1[:self.Match_MaxBits], 1466 | expected_loc) 1467 | if start_loc != -1: 1468 | end_loc = self.match_main(text, text1[-self.Match_MaxBits:], 1469 | expected_loc + len(text1) - self.Match_MaxBits) 1470 | if end_loc == -1 or start_loc >= end_loc: 1471 | # Can't find valid trailing context. Drop this patch. 1472 | start_loc = -1 1473 | else: 1474 | start_loc = self.match_main(text, text1, expected_loc) 1475 | if start_loc == -1: 1476 | # No match found. :( 1477 | results.append(False) 1478 | # Subtract the delta for this failed patch from subsequent patches. 1479 | delta -= patch.length2 - patch.length1 1480 | else: 1481 | # Found a match. :) 1482 | results.append(True) 1483 | delta = start_loc - expected_loc 1484 | if end_loc == -1: 1485 | text2 = text[start_loc : start_loc + len(text1)] 1486 | else: 1487 | text2 = text[start_loc : end_loc + self.Match_MaxBits] 1488 | if text1 == text2: 1489 | # Perfect match, just shove the replacement text in. 1490 | text = (text[:start_loc] + self.diff_text2(patch.diffs) + 1491 | text[start_loc + len(text1):]) 1492 | else: 1493 | # Imperfect match. 1494 | # Run a diff to get a framework of equivalent indices. 1495 | diffs = self.diff_main(text1, text2, False) 1496 | if (len(text1) > self.Match_MaxBits and 1497 | self.diff_levenshtein(diffs) / float(len(text1)) > 1498 | self.Patch_DeleteThreshold): 1499 | # The end points match, but the content is unacceptably bad. 1500 | results[-1] = False 1501 | else: 1502 | self.diff_cleanupSemanticLossless(diffs) 1503 | index1 = 0 1504 | for (op, data) in patch.diffs: 1505 | if op != self.DIFF_EQUAL: 1506 | index2 = self.diff_xIndex(diffs, index1) 1507 | if op == self.DIFF_INSERT: # Insertion 1508 | text = text[:start_loc + index2] + data + text[start_loc + 1509 | index2:] 1510 | elif op == self.DIFF_DELETE: # Deletion 1511 | text = text[:start_loc + index2] + text[start_loc + 1512 | self.diff_xIndex(diffs, index1 + len(data)):] 1513 | if op != self.DIFF_DELETE: 1514 | index1 += len(data) 1515 | # Strip the padding off. 1516 | text = text[len(nullPadding):-len(nullPadding)] 1517 | return (text, results) 1518 | 1519 | def patch_addPadding(self, patches): 1520 | """Add some padding on text start and end so that edges can match 1521 | something. Intended to be called only from within patch_apply. 1522 | 1523 | Args: 1524 | patches: Array of Patch objects. 1525 | 1526 | Returns: 1527 | The padding string added to each side. 1528 | """ 1529 | paddingLength = self.Patch_Margin 1530 | nullPadding = "" 1531 | for x in range(1, paddingLength + 1): 1532 | nullPadding += chr(x) 1533 | 1534 | # Bump all the patches forward. 1535 | for patch in patches: 1536 | patch.start1 += paddingLength 1537 | patch.start2 += paddingLength 1538 | 1539 | # Add some padding on start of first diff. 1540 | patch = patches[0] 1541 | diffs = patch.diffs 1542 | if not diffs or diffs[0][0] != self.DIFF_EQUAL: 1543 | # Add nullPadding equality. 1544 | diffs.insert(0, (self.DIFF_EQUAL, nullPadding)) 1545 | patch.start1 -= paddingLength # Should be 0. 1546 | patch.start2 -= paddingLength # Should be 0. 1547 | patch.length1 += paddingLength 1548 | patch.length2 += paddingLength 1549 | elif paddingLength > len(diffs[0][1]): 1550 | # Grow first equality. 1551 | extraLength = paddingLength - len(diffs[0][1]) 1552 | newText = nullPadding[len(diffs[0][1]):] + diffs[0][1] 1553 | diffs[0] = (diffs[0][0], newText) 1554 | patch.start1 -= extraLength 1555 | patch.start2 -= extraLength 1556 | patch.length1 += extraLength 1557 | patch.length2 += extraLength 1558 | 1559 | # Add some padding on end of last diff. 1560 | patch = patches[-1] 1561 | diffs = patch.diffs 1562 | if not diffs or diffs[-1][0] != self.DIFF_EQUAL: 1563 | # Add nullPadding equality. 1564 | diffs.append((self.DIFF_EQUAL, nullPadding)) 1565 | patch.length1 += paddingLength 1566 | patch.length2 += paddingLength 1567 | elif paddingLength > len(diffs[-1][1]): 1568 | # Grow last equality. 1569 | extraLength = paddingLength - len(diffs[-1][1]) 1570 | newText = diffs[-1][1] + nullPadding[:extraLength] 1571 | diffs[-1] = (diffs[-1][0], newText) 1572 | patch.length1 += extraLength 1573 | patch.length2 += extraLength 1574 | 1575 | return nullPadding 1576 | 1577 | def patch_splitMax(self, patches): 1578 | """Look through the patches and break up any which are longer than the 1579 | maximum limit of the match algorithm. 1580 | Intended to be called only from within patch_apply. 1581 | 1582 | Args: 1583 | patches: Array of Patch objects. 1584 | """ 1585 | patch_size = self.Match_MaxBits 1586 | if not patch_size: 1587 | # Python has the option of not splitting strings due to its ability 1588 | # to handle integers of arbitrary precision. 1589 | return 1590 | for x in range(len(patches)): 1591 | if patches[x].length1 <= patch_size: 1592 | continue 1593 | bigpatch = patches[x] 1594 | # Remove the big old patch. 1595 | del patches[x] 1596 | x -= 1 1597 | start1 = bigpatch.start1 1598 | start2 = bigpatch.start2 1599 | precontext = '' 1600 | while bigpatch.diffs: 1601 | # Create one of several smaller patches. 1602 | patch = patch_obj() 1603 | empty = True 1604 | patch.start1 = start1 - len(precontext) 1605 | patch.start2 = start2 - len(precontext) 1606 | if precontext: 1607 | patch.length1 = patch.length2 = len(precontext) 1608 | patch.diffs.append((self.DIFF_EQUAL, precontext)) 1609 | 1610 | while (bigpatch.diffs and 1611 | patch.length1 < patch_size - self.Patch_Margin): 1612 | (diff_type, diff_text) = bigpatch.diffs[0] 1613 | if diff_type == self.DIFF_INSERT: 1614 | # Insertions are harmless. 1615 | patch.length2 += len(diff_text) 1616 | start2 += len(diff_text) 1617 | patch.diffs.append(bigpatch.diffs.pop(0)) 1618 | empty = False 1619 | elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and 1620 | patch.diffs[0][0] == self.DIFF_EQUAL and 1621 | len(diff_text) > 2 * patch_size): 1622 | # This is a large deletion. Let it pass in one chunk. 1623 | patch.length1 += len(diff_text) 1624 | start1 += len(diff_text) 1625 | empty = False 1626 | patch.diffs.append((diff_type, diff_text)) 1627 | del bigpatch.diffs[0] 1628 | else: 1629 | # Deletion or equality. Only take as much as we can stomach. 1630 | diff_text = diff_text[:patch_size - patch.length1 - 1631 | self.Patch_Margin] 1632 | patch.length1 += len(diff_text) 1633 | start1 += len(diff_text) 1634 | if diff_type == self.DIFF_EQUAL: 1635 | patch.length2 += len(diff_text) 1636 | start2 += len(diff_text) 1637 | else: 1638 | empty = False 1639 | 1640 | patch.diffs.append((diff_type, diff_text)) 1641 | if diff_text == bigpatch.diffs[0][1]: 1642 | del bigpatch.diffs[0] 1643 | else: 1644 | bigpatch.diffs[0] = (bigpatch.diffs[0][0], 1645 | bigpatch.diffs[0][1][len(diff_text):]) 1646 | 1647 | # Compute the head context for the next patch. 1648 | precontext = self.diff_text2(patch.diffs) 1649 | precontext = precontext[-self.Patch_Margin:] 1650 | # Append the end context for this patch. 1651 | postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin] 1652 | if postcontext: 1653 | patch.length1 += len(postcontext) 1654 | patch.length2 += len(postcontext) 1655 | if patch.diffs and patch.diffs[-1][0] == self.DIFF_EQUAL: 1656 | patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] + 1657 | postcontext) 1658 | else: 1659 | patch.diffs.append((self.DIFF_EQUAL, postcontext)) 1660 | 1661 | if not empty: 1662 | x += 1 1663 | patches.insert(x, patch) 1664 | 1665 | 1666 | class patch_obj: 1667 | """Class representing one patch operation. 1668 | """ 1669 | __slots__ = ('diffs', 'start1', 'start2', 'length1', 'length2') 1670 | 1671 | def __init__(self): 1672 | """Initializes with an empty list of diffs. 1673 | """ 1674 | self.diffs = [] 1675 | self.start1: int | None = None 1676 | self.start2: int | None = None 1677 | self.length1 = 0 1678 | self.length2 = 0 1679 | -------------------------------------------------------------------------------- /osm_revert/dmp_utils.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Collection, Sequence 2 | from typing import cast 3 | 4 | from sentry_sdk import trace 5 | 6 | from osm_revert.context_logger import context_print 7 | from osm_revert.diff_match_patch import diff_match_patch 8 | 9 | 10 | def dmp_retry_reverse(old: Collection[str], new: Sequence[str], current: Collection[str]) -> list[str] | None: 11 | if result := dmp(old, new, current): 12 | return result 13 | context_print('[DMP] Retrying in reverse') 14 | return dmp(old, new[::-1], current) 15 | 16 | 17 | @trace 18 | def dmp(old: Collection[str], new: Collection[str], current: Collection[str]) -> list[str] | None: 19 | old_lines = '\n'.join(old) + '\n' 20 | new_lines = '\n'.join(new) + '\n' 21 | current_lines = '\n'.join(current) + '\n' 22 | 23 | d = diff_match_patch() 24 | d.Match_Threshold = 1 25 | d.Patch_DeleteThreshold = 0 26 | 27 | (old_text, new_text, current_text, line_arr) = d.diff_linesToChars(old_lines, new_lines, current_lines) 28 | diff = d.diff_main(new_text, current_text, checklines=False) 29 | patch = d.patch_make(diff) 30 | 31 | result_text, result_bools = d.patch_apply(patch, old_text) 32 | 33 | # some patches failed to apply 34 | if not all(result_bools): 35 | context_print('[DMP] Patch failed (not_all)') 36 | return None 37 | 38 | result_lines = cast(str, d.diff_charsToLinesText(result_text, line_arr)) 39 | result = result_lines.strip().split('\n') 40 | 41 | # result must not contain duplicates 42 | if len(result) != len(set(result)): 43 | context_print('[DMP] Patch failed (duplicate)') 44 | return None 45 | 46 | result_set = set(result) 47 | old_set = set(old) 48 | 49 | # result must not create any new elements 50 | if result_set - old_set.union(current): 51 | context_print('[DMP] Patch failed (create_new)') 52 | return None 53 | 54 | # result must not delete any common elements 55 | if old_set.intersection(current) - result_set: 56 | context_print('[DMP] Patch failed (common_delete)') 57 | return None 58 | 59 | return result 60 | -------------------------------------------------------------------------------- /osm_revert/invert.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import Counter 3 | from collections.abc import Iterable 4 | from copy import deepcopy 5 | from typing import TypedDict 6 | 7 | from sentry_sdk import trace 8 | 9 | from osm_revert.context_logger import context_print 10 | from osm_revert.diff_entry import DiffEntry 11 | from osm_revert.dmp_utils import dmp_retry_reverse 12 | from osm_revert.utils import ensure_iterable 13 | 14 | StatisticsDict = TypedDict( 15 | 'StatisticsDict', 16 | { 17 | 'fix:node': int, 18 | 'fix:way': int, 19 | 'fix:relation': int, 20 | 'dmp:way': int, 21 | 'dmp:way:id': list[str | int], 22 | 'dmp:relation': int, 23 | 'dmp:relation:id': list[str | int], 24 | 'dmp:fail:way': int, 25 | 'dmp:fail:way:id': list[str | int], 26 | 'dmp:fail:relation': int, 27 | 'dmp:fail:relation:id': list[str | int], 28 | }, 29 | ) 30 | 31 | 32 | class Inverter: 33 | __slots__ = ('_current_map', '_only_tags', '_run_counter', '_version_map', 'statistics', 'warnings') 34 | 35 | def __init__(self, only_tags: frozenset[str]) -> None: 36 | self._only_tags = only_tags 37 | 38 | # we need this to make reverting multiple changesets at a time possible 39 | self._current_map = {'node': {}, 'way': {}, 'relation': {}} 40 | 41 | # store latest versions of elements (for osmChange upload) 42 | self._version_map = {'node': {}, 'way': {}, 'relation': {}} 43 | 44 | self._run_counter: Counter[str] = Counter() 45 | 46 | self.statistics: StatisticsDict = { 47 | 'fix:node': 0, 48 | 'fix:way': 0, 49 | 'fix:relation': 0, 50 | 'dmp:way': 0, 51 | 'dmp:way:id': [], 52 | 'dmp:relation': 0, 53 | 'dmp:relation:id': [], 54 | 'dmp:fail:way': 0, 55 | 'dmp:fail:way:id': [], 56 | 'dmp:fail:relation': 0, 57 | 'dmp:fail:relation:id': [], 58 | } 59 | 60 | self.warnings: dict[str, list[str]] = {'node': [], 'way': [], 'relation': []} 61 | 62 | def _should_print(self, name: str, limit: int) -> bool: 63 | self._run_counter.update((name,)) 64 | current = self._run_counter[name] 65 | if current == limit + 1: 66 | context_print(f'🔇 Suppressing further messages for {name!r}') 67 | return current <= limit 68 | 69 | @trace 70 | def invert_diff(self, diff: dict[str, list[DiffEntry]]) -> dict[str, list]: 71 | for element_type, elements in diff.items(): 72 | for entry in elements: 73 | element_id = entry.element_id 74 | old = entry.element_old 75 | new = entry.element_new 76 | current = entry.element_current 77 | 78 | if element_id not in self._version_map[element_type]: 79 | self._version_map[element_type][element_id] = current['@version'] 80 | 81 | last_current = self._current_map[element_type].get(element_id, None) 82 | if last_current is not None: 83 | current = deepcopy(last_current) 84 | 85 | _set_visible_original((current, new, old), current) 86 | 87 | self._invert_element(element_type, element_id, old, new, current) 88 | 89 | result = { 90 | element_type: list(element_id_map.values()) # 91 | for element_type, element_id_map in self._current_map.items() 92 | } 93 | 94 | for element_type, elements in result.items(): 95 | for element in elements.copy(): 96 | # restore latest version number (for valid osmChange) 97 | element['@version'] = self._version_map[element_type][element['@id']] 98 | 99 | # don't delete already deleted elements (this may happen during multiple changesets) 100 | if element['@visible'] == 'false' and element['@visible:original'] == 'false': 101 | elements.remove(element) 102 | else: 103 | del element['@visible:original'] 104 | 105 | # convert [a, b, c] to 'a;b;c' 106 | for key, value in self.statistics.items(): 107 | if value and isinstance(value, list | tuple): 108 | self.statistics[key] = ';'.join(value) 109 | 110 | return result 111 | 112 | def _invert_element(self, element_type: str, element_id: str, old: dict, new: dict, current: dict) -> None: 113 | # create 114 | if (not old or old['@visible'] == 'false') and new['@visible'] == 'true': 115 | # ignore only_tags mode 116 | if self._only_tags: 117 | return 118 | 119 | # absolute delete 120 | if current['@visible'] == 'true': 121 | current['@visible'] = 'false' 122 | self._current_map[element_type][element_id] = current 123 | 124 | # modify 125 | elif old['@visible'] == 'true' and new['@visible'] == 'true': 126 | # simple revert; only_tags mode requires advanced revert 127 | if current['@version'] == new['@version'] and not self._only_tags: 128 | self._current_map[element_type][element_id] = old 129 | 130 | # advanced revert (element currently is not deleted) 131 | elif current['@visible'] == 'true': 132 | if self._should_print('advanced revert', 50): 133 | context_print(f'🛠️ Performing advanced revert on {element_type}/{element_id}') 134 | 135 | self.statistics[f'fix:{element_type}'] += 1 136 | 137 | current['tag'] = ensure_iterable(current.get('tag', ())) 138 | current_original = deepcopy(current) 139 | 140 | self._invert_tags(old, new, current) 141 | 142 | if not self._only_tags: 143 | if element_type == 'node': 144 | self._invert_node_position(old, new, current) 145 | elif element_type == 'way': 146 | self._invert_way_nodes(old, new, current) 147 | elif element_type == 'relation': 148 | self._invert_relation_members(old, new, current) 149 | else: 150 | raise NotImplementedError(f'Unknown element type: {element_type}') 151 | 152 | if current != current_original: 153 | self._current_map[element_type][element_id] = current 154 | 155 | # delete 156 | elif old['@visible'] == 'true' and new['@visible'] == 'false': 157 | # ignore only_tags mode 158 | if self._only_tags: 159 | return 160 | 161 | # do not restore repeatedly deleted elements 162 | if current['@version'] == new['@version']: 163 | self._current_map[element_type][element_id] = old 164 | 165 | else: 166 | raise Exception(f'Invalid state: {old!r}, {new!r}') 167 | 168 | def _invert_tags(self, old: dict, new: dict, current: dict) -> None: 169 | old_tags = {d['@k']: d['@v'] for d in ensure_iterable(old.get('tag', ()))} 170 | new_tags = {d['@k']: d['@v'] for d in ensure_iterable(new.get('tag', ()))} 171 | current_tags = {d['@k']: d['@v'] for d in ensure_iterable(current.get('tag', ()))} 172 | 173 | self._invert_tags_create(old_tags, new_tags, current_tags) 174 | self._invert_tags_modify(old_tags, new_tags, current_tags) 175 | self._invert_tags_delete(old_tags, new_tags, current_tags) 176 | 177 | current['tag'] = tuple({'@k': k, '@v': v} for k, v in current_tags.items()) 178 | 179 | def _invert_tags_create(self, old_tags: dict, new_tags: dict, current_tags: dict) -> None: 180 | changed_items = set(new_tags.items()) - set(old_tags.items()) 181 | 182 | for key, value in changed_items: 183 | # ignore only_tags mode 184 | if self._only_tags and key not in self._only_tags: 185 | continue 186 | 187 | # ignore modified 188 | if key in old_tags: 189 | continue 190 | 191 | # expect to be new value 192 | if current_tags.get(key) != value: 193 | continue 194 | 195 | del current_tags[key] 196 | 197 | def _invert_tags_modify(self, old_tags: dict, new_tags: dict, current_tags: dict) -> None: 198 | changed_items = set(new_tags.items()) - set(old_tags.items()) 199 | 200 | for key, value in changed_items: 201 | # ignore only_tags mode 202 | if self._only_tags and key not in self._only_tags: 203 | continue 204 | 205 | # ignore created 206 | if key not in old_tags: 207 | continue 208 | 209 | # expect to be new value 210 | if current_tags.get(key) != value: 211 | continue 212 | 213 | current_tags[key] = old_tags[key] 214 | 215 | def _invert_tags_delete(self, old_tags: dict, new_tags: dict, current_tags: dict) -> None: 216 | changed_items = set(old_tags.items()) - set(new_tags.items()) 217 | 218 | for key, value in changed_items: 219 | # ignore only_tags mode 220 | if self._only_tags and key not in self._only_tags: 221 | continue 222 | 223 | # ignore modified 224 | if key in new_tags: 225 | continue 226 | 227 | # expect to be deleted 228 | if current_tags.get(key) is not None: 229 | continue 230 | 231 | current_tags[key] = value 232 | 233 | def _invert_node_position(self, old: dict, new: dict, current: dict) -> None: 234 | # ignore unmoved 235 | if old['@lat'] == new['@lat'] and old['@lon'] == new['@lon']: 236 | return 237 | 238 | # expect to be at new location 239 | if current['@lat'] != new['@lat'] or current['@lon'] != new['@lon']: 240 | return 241 | 242 | current['@lat'] = old['@lat'] 243 | current['@lon'] = old['@lon'] 244 | 245 | def _invert_way_nodes(self, old: dict, new: dict, current: dict) -> None: 246 | old_nodes = tuple(json.dumps(n) for n in ensure_iterable(old.get('nd', ()))) 247 | new_nodes = tuple(json.dumps(n) for n in ensure_iterable(new.get('nd', ()))) 248 | current_nodes = tuple(json.dumps(n) for n in ensure_iterable(current.get('nd', ()))) 249 | 250 | # ignore unmodified 251 | if old_nodes == new_nodes: 252 | return 253 | 254 | # already reverted 255 | if current_nodes != new_nodes and not set(old_nodes).symmetric_difference(current_nodes): 256 | return 257 | 258 | # simple revert if no more edits 259 | if current_nodes == new_nodes: 260 | current['nd'] = old['nd'] 261 | return 262 | 263 | context_print(f'💡 Performing DMP patch on way/{new["@id"]}') 264 | 265 | if patch := dmp_retry_reverse(old_nodes, new_nodes, current_nodes): 266 | current['nd'] = tuple(json.loads(p) for p in patch) 267 | context_print('[DMP][☑️] Patch successful') 268 | self.statistics['dmp:way'] += 1 269 | self.statistics['dmp:way:id'].append(new['@id']) 270 | else: 271 | # absolute delete 272 | create_diff = {n['@ref'] for n in ensure_iterable(new.get('nd', ()))} 273 | create_diff = create_diff.difference(n['@ref'] for n in ensure_iterable(old.get('nd', ()))) 274 | current['nd'] = tuple( 275 | n # 276 | for n in ensure_iterable(current.get('nd', ())) 277 | if n['@ref'] not in create_diff 278 | ) 279 | 280 | self.statistics['dmp:fail:way'] += 1 281 | self.statistics['dmp:fail:way:id'].append(new['@id']) 282 | self.warnings['way'].append(new['@id']) 283 | 284 | def _invert_relation_members(self, old: dict, new: dict, current: dict) -> None: 285 | old_members = tuple(json.dumps(m) for m in ensure_iterable(old.get('member', ()))) 286 | new_members = tuple(json.dumps(m) for m in ensure_iterable(new.get('member', ()))) 287 | current_members = tuple(json.dumps(m) for m in ensure_iterable(current.get('member', ()))) 288 | 289 | # ignore unmodified 290 | if old_members == new_members: 291 | return 292 | 293 | # already reverted 294 | if current_members != new_members and set(old_members) == set(current_members): 295 | return 296 | 297 | # simple revert if no more edits 298 | if current_members == new_members: 299 | current['member'] = old['member'] 300 | return 301 | 302 | context_print(f'💡 Performing DMP patch relation/{new["@id"]}') 303 | 304 | if patch := dmp_retry_reverse(old_members, new_members, current_members): 305 | current['member'] = tuple(json.loads(p) for p in patch) 306 | context_print('✅ Patch successful') 307 | self.statistics['dmp:relation'] += 1 308 | self.statistics['dmp:relation:id'].append(new['@id']) 309 | else: 310 | # absolute delete 311 | create_diff = {m['@ref'] for m in ensure_iterable(new.get('member', ()))} 312 | create_diff = create_diff.difference(m['@ref'] for m in ensure_iterable(old.get('member', ()))) 313 | current['member'] = tuple( 314 | m # 315 | for m in ensure_iterable(current.get('member', ())) 316 | if m['@ref'] not in create_diff 317 | ) 318 | 319 | self.statistics['dmp:fail:relation'] += 1 320 | self.statistics['dmp:fail:relation:id'].append(new['@id']) 321 | self.warnings['relation'].append(new['@id']) 322 | 323 | 324 | def _set_visible_original(targets: Iterable[dict | None], current: dict) -> None: 325 | visible_original = current.get('@visible:original', current['@visible']) 326 | 327 | for target in targets: 328 | if target is not None and '@visible:original' not in target: 329 | target['@visible:original'] = visible_original 330 | -------------------------------------------------------------------------------- /osm_revert/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import traceback 4 | from collections.abc import Sequence 5 | from functools import wraps 6 | 7 | import uvloop 8 | import xmltodict 9 | from pydantic import SecretStr 10 | from sentry_sdk import capture_exception, trace 11 | 12 | from osm_revert.config import CHANGESETS_LIMIT_CONFIG, CHANGESETS_LIMIT_MODERATOR_REVERT, CREATED_BY, OSM_URL, WEBSITE 13 | from osm_revert.context_logger import context_print 14 | from osm_revert.diff_entry import DiffEntry 15 | from osm_revert.invert import Inverter 16 | from osm_revert.osm import OsmApi, build_osm_change 17 | from osm_revert.overpass import Overpass 18 | from osm_revert.utils import is_osm_moderator 19 | 20 | 21 | @trace 22 | def merge_and_sort_diffs(diffs: Sequence[dict[str, list[DiffEntry]]]) -> dict[str, list[DiffEntry]]: 23 | if not diffs: 24 | return {} 25 | 26 | result = diffs[0] 27 | 28 | for diff in diffs[1:]: 29 | for element_type, elements in diff.items(): 30 | result[element_type] += elements 31 | 32 | for element_type, elements in result.items(): 33 | # sort by newest edits first 34 | result[element_type] = sorted(elements, key=lambda t: t.timestamp, reverse=True) 35 | 36 | return result 37 | 38 | 39 | def filter_discussion_changesets(changeset_ids: Sequence[int], target: str) -> Sequence[int]: 40 | if target == 'all': 41 | return changeset_ids 42 | if target == 'newest': 43 | return changeset_ids[-1:] 44 | if target == 'oldest': 45 | return changeset_ids[:1] 46 | context_print(f'🚧 Warning: Unknown discussion target: {target}') 47 | return () 48 | 49 | 50 | def print_warn_elements(warn_elements: dict[str, list[str]]) -> None: 51 | for element_type, element_ids in warn_elements.items(): 52 | for element_id in element_ids: 53 | context_print(f'⚠️ Please verify: {OSM_URL}/{element_type}/{element_id}') 54 | 55 | 56 | def main_timer(func): 57 | @wraps(func) 58 | async def wrapper(*args, **kwargs): 59 | start_time = time.perf_counter() 60 | 61 | try: 62 | exit_code = await func(*args, **kwargs) 63 | except Exception as e: 64 | capture_exception(e) 65 | context_print(traceback.format_exc()) 66 | exit_code = -2 67 | 68 | total_time = time.perf_counter() - start_time 69 | context_print(f'🏁 Total time: {total_time:.1F} sec') 70 | return exit_code 71 | 72 | return wrapper 73 | 74 | 75 | # TODO: improved revert to date 76 | # TODO: filter does not include nodes if way was unmodified 77 | # https://overpass-api.de/achavi/?changeset=131696060 78 | # https://www.openstreetmap.org/way/357241890/history 79 | 80 | 81 | # TODO: util function to ensure tags existence and type 82 | # TODO: slow but very accurate revert (download full history of future edits); overpass-api: timeline 83 | # TODO: dataclasses 84 | @main_timer 85 | @trace 86 | async def main( 87 | changeset_ids: Sequence[int], 88 | comment: str, 89 | *, 90 | access_token: SecretStr, 91 | discussion: str = '', 92 | discussion_target: str = 'all', 93 | osc_file: str | None = None, 94 | print_osc: bool | None = None, 95 | query_filter: str = '', 96 | only_tags: Sequence[str] = (), 97 | fix_parents: bool = True, 98 | ) -> int: 99 | if not changeset_ids: 100 | raise ValueError('Missing changeset ids') 101 | 102 | changeset_ids = tuple(sorted(set(changeset_ids))) 103 | only_tags_set = frozenset(tag.strip() for tag in only_tags if tag) 104 | 105 | context_print('🔒️ Logging in to OpenStreetMap') 106 | osm = OsmApi(access_token) 107 | user = await osm.get_authorized_user() 108 | 109 | user_edits = user['changesets']['count'] 110 | user_is_moderator = is_osm_moderator(user['roles']) 111 | 112 | context_print(f'👤 Welcome, {user["display_name"]}{" 🔷" if user_is_moderator else ""}!') 113 | 114 | if user['display_name'] != 'NorthCrab': 115 | changesets_limit_config = CHANGESETS_LIMIT_CONFIG['moderator' if user_is_moderator else ''] 116 | changesets_limit = max(v for k, v in changesets_limit_config.items() if k <= user_edits) 117 | 118 | if changesets_limit == 0: 119 | min_edits = min(k for k in changesets_limit_config if k > 0) 120 | context_print(f'🐥 You need to make at least {min_edits} edits to use this tool') 121 | return -1 122 | 123 | if changesets_limit < len(changeset_ids): 124 | context_print(f'🛟 For safety, you can only revert up to {changesets_limit} changesets at a time') 125 | 126 | if limit_increase := min((k for k in changesets_limit_config if k > user_edits), default=None): 127 | context_print(f'🛟 To increase this limit, make at least {limit_increase} edits') 128 | 129 | return -1 130 | 131 | overpass = Overpass() 132 | diffs = [] 133 | 134 | for changeset_id in changeset_ids: 135 | context_print(f'☁️ Downloading changeset {changeset_id}') 136 | 137 | context_print('[1/?] OpenStreetMap …') 138 | changeset = await osm.get_changeset(changeset_id) 139 | 140 | if user_edits < CHANGESETS_LIMIT_MODERATOR_REVERT and not user_is_moderator: 141 | changeset_user = await osm.get_user(changeset['osm']['changeset']['@uid']) 142 | if changeset_user and is_osm_moderator(changeset_user['roles']): 143 | context_print('🛑 Moderators changesets cannot be reverted') 144 | return -1 145 | 146 | changeset_size = sum(len(v) for p in changeset['partition'].values() for v in p.values()) 147 | partition_count = len(changeset['partition']) 148 | steps = partition_count + 1 149 | 150 | context_print(f'[1/{steps}] OpenStreetMap: {changeset_size} element{"s" if changeset_size > 1 else ""}') 151 | 152 | if changeset_size: 153 | if partition_count > 2: 154 | context_print(f'[2/{steps}] Overpass ({partition_count} partitions, this may take a while) …') 155 | else: 156 | context_print( 157 | f'[2/{steps}] Overpass ({partition_count} partition{"s" if partition_count > 1 else ""}) …' 158 | ) 159 | 160 | diff = await overpass.get_changeset_elements_history(changeset, steps, query_filter) 161 | if not diff: 162 | return -1 163 | 164 | diffs.append(diff) 165 | diff_size = sum(len(el) for el in diff.values()) 166 | 167 | if diff_size > changeset_size: 168 | raise RuntimeError(f'Diff must not be larger than changeset size: {diff_size=}, {changeset_size=}') 169 | 170 | if query_filter: 171 | context_print( 172 | f'[{steps}/{steps}] Overpass: {diff_size} element{"s" if diff_size > 1 else ""} (🪣 filtered)' 173 | ) 174 | else: 175 | context_print(f'[{steps}/{steps}] Overpass: {diff_size} element{"s" if diff_size > 1 else ""}') 176 | 177 | context_print('🔁 Generating a revert') 178 | merged_diffs = merge_and_sort_diffs(diffs) 179 | 180 | inverter = Inverter(only_tags_set) 181 | invert = inverter.invert_diff(merged_diffs) 182 | 183 | parents_counter = await overpass.update_parents(invert, fix_parents=fix_parents) 184 | if parents_counter: 185 | if fix_parents: 186 | context_print(f'🛠️ Fixing {parents_counter} parent{"s" if parents_counter > 1 else ""}') 187 | else: 188 | context_print(f'🛠️ Skipping {parents_counter} element{"s" if parents_counter > 1 else ""} (not orphaned)') 189 | 190 | invert_size = sum(len(elements) for elements in invert.values()) 191 | if invert_size == 0: 192 | context_print('✅ Nothing to revert') 193 | return 0 194 | 195 | if osc_file or print_osc: 196 | context_print(f'💾 Saving {invert_size} change{"s" if invert_size > 1 else ""} to .osc') 197 | osm_change = build_osm_change(invert, changeset_id=None) 198 | osm_change_xml = xmltodict.unparse(osm_change, pretty=True) 199 | 200 | if osc_file: 201 | with open(osc_file, 'w', encoding='utf-8') as f: 202 | f.write(osm_change_xml) 203 | 204 | if print_osc: 205 | context_print('') 206 | context_print(osm_change_xml) 207 | context_print('') 208 | 209 | print_warn_elements(inverter.warnings) 210 | context_print('✅ Success') 211 | return 0 212 | 213 | else: 214 | changeset_max_size = await osm.get_changeset_max_size() 215 | 216 | if invert_size > changeset_max_size: 217 | context_print(f'🐘 Revert is too big: {invert_size} > {changeset_max_size}') 218 | if len(changeset_ids) > 1: 219 | context_print('🐘 Hint: Try reducing the amount of changesets to revert at once') 220 | if fix_parents: 221 | context_print('🐘 Hint: Try disabling parent fixing') 222 | return -1 223 | 224 | context_print(f'🌍️ Uploading {invert_size} change{"s" if invert_size > 1 else ""}') 225 | extra_args = {'changesets_count': user_edits + 1, 'created_by': CREATED_BY, 'host': WEBSITE} 226 | 227 | if len(changeset_ids) == 1: 228 | extra_args['id'] = ';'.join(f'{OSM_URL}/changeset/{c}' for c in changeset_ids) 229 | else: 230 | extra_args['id'] = ';'.join(map(str, changeset_ids)) 231 | 232 | if query_filter: 233 | extra_args['filter'] = query_filter 234 | 235 | if changeset_id := await osm.upload_diff(invert, comment, extra_args | inverter.statistics): 236 | changeset_url = f'{OSM_URL}/changeset/{changeset_id}' 237 | 238 | discussion = discussion.strip() 239 | 240 | if len(discussion) >= 4: # prevent accidental discussions 241 | discussion += f'\n\n{changeset_url}' 242 | 243 | discuss_changeset_ids = filter_discussion_changesets(changeset_ids, discussion_target) 244 | context_print( 245 | f'💬 Discussing {len(discuss_changeset_ids)} changeset{"s" if len(discuss_changeset_ids) > 1 else ""}' 246 | ) 247 | 248 | for i, changeset_id in enumerate(discuss_changeset_ids, 1): 249 | status = await osm.post_discussion_comment(changeset_id, discussion) 250 | context_print(f'[{i}/{len(discuss_changeset_ids)}] Changeset {changeset_id}: {status}') 251 | 252 | print_warn_elements(inverter.warnings) 253 | context_print('✅ Success') 254 | context_print(f'✅ {changeset_url}') 255 | return 0 256 | 257 | return -1 258 | 259 | 260 | if __name__ == '__main__': 261 | # For debugging 262 | uvloop.run( 263 | main( 264 | changeset_ids=[124750619], 265 | comment='revert', 266 | print_osc=True, 267 | query_filter='', 268 | fix_parents=True, 269 | access_token=SecretStr(os.environ['OSM_TOKEN']), 270 | ) 271 | ) 272 | -------------------------------------------------------------------------------- /osm_revert/osm.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Collection 2 | from typing import Any 3 | 4 | import xmltodict 5 | from pydantic import SecretStr 6 | from sentry_sdk import trace 7 | 8 | from osm_revert.config import CREATED_BY, NO_TAG_PREFIX, OSM_API_URL, TAG_MAX_LENGTH, TAG_PREFIX 9 | from osm_revert.context_logger import context_print 10 | from osm_revert.utils import ensure_iterable, get_http_client, retry_exponential 11 | 12 | 13 | @trace 14 | def sort_relations_for_osm_change(relations: Collection[dict]) -> list[dict]: 15 | change_ids = {rel['@id'] for rel in relations} 16 | 17 | # tuples: (relation, set of relation ids it depends on) 18 | dependency_state = { 19 | rel['@id']: ( 20 | rel, 21 | change_ids.intersection( 22 | m['@ref'] # 23 | for m in ensure_iterable(rel.get('member', ())) 24 | if m['@type'] == 'relation' 25 | ), 26 | ) 27 | for rel in relations 28 | } 29 | 30 | no_dependencies = [] 31 | 32 | for rel_id, (rel, deps) in tuple(dependency_state.items()): 33 | if not deps: 34 | no_dependencies.append(rel) 35 | dependency_state.pop(rel_id) 36 | 37 | result = [] 38 | hidden = [] 39 | 40 | while no_dependencies: 41 | rel = no_dependencies.pop() 42 | rel_id = rel['@id'] 43 | 44 | if rel['@visible'] == 'true': 45 | result.append(rel) 46 | else: 47 | hidden.append(rel) 48 | 49 | for other_rel_id, (other_rel, deps) in tuple(dependency_state.items()): 50 | if rel_id in deps: 51 | deps.remove(rel_id) 52 | 53 | if not deps: 54 | no_dependencies.append(other_rel) 55 | dependency_state.pop(other_rel_id) 56 | 57 | # delete relations with most dependencies first 58 | result.extend(reversed(hidden)) 59 | 60 | for rel, deps in dependency_state.values(): 61 | context_print(f'🚧 Warning: relation/{rel["@id"]} has {len(deps)} circular dependencies') 62 | result.append(rel) 63 | 64 | return result 65 | 66 | 67 | @trace 68 | def build_osm_change(diff: dict, changeset_id: str | None) -> dict: 69 | result = { 70 | 'osmChange': { 71 | '@version': 0.6, 72 | '@generator': CREATED_BY, 73 | 'modify': {'node': [], 'way': [], 'relation': []}, 74 | 'delete': {'relation': [], 'way': [], 'node': []}, 75 | } 76 | } 77 | 78 | for element_type, elements in diff.items(): 79 | if element_type == 'relation': 80 | elements = sort_relations_for_osm_change(elements) 81 | 82 | for element in elements: 83 | if changeset_id: 84 | element['@changeset'] = changeset_id 85 | else: 86 | del element['@changeset'] 87 | 88 | if element['@visible'] == 'true': 89 | action = 'modify' 90 | else: 91 | action = 'delete' 92 | element.pop('@lat', None) 93 | element.pop('@lon', None) 94 | element.pop('tag', None) 95 | element.pop('nd', None) 96 | element.pop('member', None) 97 | 98 | result['osmChange'][action][element_type].append(element) 99 | 100 | return result 101 | 102 | 103 | class OsmApi: 104 | def __init__(self, access_token: SecretStr): 105 | self._http = get_http_client( 106 | f'{OSM_API_URL}/api', 107 | headers={'Authorization': f'Bearer {access_token.get_secret_value()}'}, 108 | ) 109 | 110 | @retry_exponential 111 | async def get_changeset_max_size(self) -> int: 112 | r = await self._http.get('/capabilities') 113 | r.raise_for_status() 114 | caps = xmltodict.parse(r.text) 115 | return int(caps['osm']['api']['changesets']['@maximum_elements']) 116 | 117 | @retry_exponential 118 | async def get_authorized_user(self) -> dict: 119 | r = await self._http.get('/0.6/user/details.json') 120 | r.raise_for_status() 121 | return r.json()['user'] 122 | 123 | @retry_exponential 124 | async def get_user(self, uid: str | int) -> dict | None: 125 | r = await self._http.get(f'/0.6/user/{uid}.json') 126 | # allow for not found users 127 | if r.status_code in (404, 410): 128 | return None 129 | r.raise_for_status() 130 | return r.json()['user'] 131 | 132 | @retry_exponential 133 | @trace 134 | async def get_changeset(self, changeset_id: int) -> dict: 135 | info_resp = await self._http.get(f'/0.6/changeset/{changeset_id}') 136 | info_resp.raise_for_status() 137 | 138 | diff_resp = await self._http.get(f'/0.6/changeset/{changeset_id}/download') 139 | diff_resp.raise_for_status() 140 | 141 | info = xmltodict.parse(info_resp.text) 142 | diff = xmltodict.parse(diff_resp.text) 143 | diff['partition'] = {} 144 | 145 | for action_type, affected_elements in diff['osmChange'].items(): 146 | if action_type.startswith('@'): 147 | continue 148 | 149 | new = {'node': [], 'way': [], 'relation': []} 150 | 151 | for affected_element in ensure_iterable(affected_elements): 152 | element_type, element = next(iter(affected_element.items())) 153 | 154 | new[element_type].append(element) 155 | 156 | if element['@timestamp'] not in diff['partition']: 157 | diff['partition'][element['@timestamp']] = {'node': [], 'way': [], 'relation': []} 158 | 159 | diff['partition'][element['@timestamp']][element_type].append(element['@id']) 160 | 161 | diff['osmChange'][action_type] = new 162 | 163 | return info | diff 164 | 165 | @trace 166 | async def upload_diff(self, diff: dict, comment: str, extra_tags: dict[str, Any]) -> str | None: 167 | if 'comment' in extra_tags: 168 | raise ValueError('comment is a reserved tag') 169 | 170 | extra_tags['comment'] = comment 171 | 172 | for key, value in tuple(extra_tags.items()): 173 | if key.startswith(TAG_PREFIX): 174 | raise ValueError(f'{key!r} is a reserved tag') 175 | 176 | if not value: 177 | del extra_tags[key] 178 | continue 179 | 180 | # stringify the value 181 | if not isinstance(value, str): 182 | value = str(value) 183 | extra_tags[key] = value 184 | 185 | # add revert: prefix if applicable 186 | if key not in NO_TAG_PREFIX: 187 | del extra_tags[key] 188 | key = f'{TAG_PREFIX}:{key}' 189 | extra_tags[key] = value 190 | 191 | # trim value if too long 192 | if len(value) > TAG_MAX_LENGTH: 193 | context_print( 194 | f'🚧 Warning: Trimming {key} value because it exceeds {TAG_MAX_LENGTH} characters: {value}' 195 | ) 196 | extra_tags[key] = value[:252] + '…' 197 | 198 | changeset = {'osm': {'changeset': {'tag': [{'@k': k, '@v': v} for k, v in extra_tags.items()]}}} 199 | changeset_xml = xmltodict.unparse(changeset) 200 | 201 | r = await self._http.put( 202 | '/0.6/changeset/create', 203 | content=changeset_xml, 204 | headers={'Content-Type': 'text/xml; charset=utf-8'}, 205 | ) 206 | r.raise_for_status() 207 | 208 | changeset_id = r.text 209 | osm_change = build_osm_change(diff, changeset_id) 210 | osm_change_xml = xmltodict.unparse(osm_change) 211 | 212 | upload_resp = await self._http.post( 213 | f'/0.6/changeset/{changeset_id}/upload', 214 | content=osm_change_xml, 215 | headers={'Content-Type': 'text/xml; charset=utf-8'}, 216 | timeout=150, 217 | ) 218 | 219 | r = await self._http.put(f'/0.6/changeset/{changeset_id}/close') 220 | r.raise_for_status() 221 | 222 | if upload_resp.status_code == 409: 223 | context_print(f'🆚 Failed to upload the changes ({upload_resp.status_code})') 224 | context_print(f'🆚 {upload_resp.text}') 225 | context_print('🆚 The Overpass data is outdated, please try again shortly') 226 | return None 227 | 228 | if upload_resp.status_code != 200: 229 | context_print(f'😵 Failed to upload the changes ({upload_resp.status_code})') 230 | context_print(f'😵 {upload_resp.text}') 231 | return None 232 | 233 | return changeset_id 234 | 235 | async def post_discussion_comment(self, changeset_id: int, comment: str) -> str: 236 | r = await self._http.post(f'/0.6/changeset/{changeset_id}/comment', data={'text': comment}) 237 | if r.is_success: 238 | return 'OK' 239 | if r.status_code in (429,): 240 | return 'RATE_LIMITED' 241 | return str(r.status_code) 242 | -------------------------------------------------------------------------------- /osm_revert/overpass.py: -------------------------------------------------------------------------------- 1 | import html 2 | import re 3 | from collections.abc import Iterable, Sequence 4 | from copy import deepcopy 5 | from datetime import UTC, datetime, timedelta 6 | from functools import lru_cache 7 | from itertools import chain, pairwise 8 | 9 | import xmltodict 10 | from httpx import AsyncClient 11 | from sentry_sdk import trace 12 | 13 | from osm_revert.config import OVERPASS_URLS, REVERT_TO_DATE 14 | from osm_revert.context_logger import context_print 15 | from osm_revert.diff_entry import DiffEntry 16 | from osm_revert.utils import ensure_iterable, get_http_client, retry_exponential 17 | 18 | _RE_REL_ALIAS = re.compile(r'\brel\b') 19 | _RE_NEG_ID_FILTER = re.compile(r'\(\s*!\s*id\s*:(?P(\s*(,\s*)?\d+)+)\s*\)') 20 | _RE_TYPE_SELECTOR_GREEDY = re.compile(r'.*\b(nwr|nw|nr|wr|node|way|relation)\b', flags=re.DOTALL) 21 | _RE_TYPE_SELECTOR = re.compile(r'\b(nwr|nw|nr|wr|node|way|relation)\b') 22 | _RE_XML_TAG = re.compile(r'<.*?>', flags=re.DOTALL) 23 | 24 | 25 | def parse_timestamp(timestamp: str) -> int: 26 | date_format = '%Y-%m-%dT%H:%M:%SZ' 27 | return int(datetime.strptime(timestamp, date_format).replace(tzinfo=UTC).timestamp()) 28 | 29 | 30 | def get_bbox(changeset: dict) -> str: 31 | e = changeset['osm']['changeset'] 32 | # some changesets don't have a bbox 33 | if '@min_lat' not in e: 34 | return '' 35 | min_lat, max_lat = e['@min_lat'], e['@max_lat'] 36 | min_lon, max_lon = e['@min_lon'], e['@max_lon'] 37 | return f'[bbox:{min_lat},{min_lon},{max_lat},{max_lon}]' 38 | 39 | 40 | def get_old_date(timestamp: str) -> str: 41 | date_format = '%Y-%m-%dT%H:%M:%SZ' 42 | date = datetime.strptime(timestamp, date_format).replace(tzinfo=UTC) 43 | created_at_minus_one = (date - timedelta(seconds=1)).strftime(date_format) 44 | return f'[date:"{created_at_minus_one}"]' 45 | 46 | 47 | def get_new_date(timestamp: str) -> str: 48 | return f'[date:"{timestamp}"]' 49 | 50 | 51 | def get_changeset_adiff(timestamp: str) -> str: 52 | if REVERT_TO_DATE is None: 53 | date_format = '%Y-%m-%dT%H:%M:%SZ' 54 | date = datetime.strptime(timestamp, date_format).replace(tzinfo=UTC) 55 | created_at_minus_one = (date - timedelta(seconds=1)).strftime(date_format) 56 | return f'[adiff:"{created_at_minus_one}","{timestamp}"]' 57 | else: 58 | return f'[adiff:"{REVERT_TO_DATE}","{timestamp}"]' 59 | 60 | 61 | def get_current_adiff(timestamp: str) -> str: 62 | return f'[adiff:"{timestamp}"]' 63 | 64 | 65 | @lru_cache(maxsize=128) 66 | def get_element_types_from_selector(selector: str) -> Sequence[str]: 67 | if selector in {'node', 'way', 'relation'}: 68 | return (selector,) 69 | result = [] 70 | if 'n' in selector: 71 | result.append('node') 72 | if 'w' in selector: 73 | result.append('way') 74 | if 'r' in selector: 75 | result.append('relation') 76 | return result 77 | 78 | 79 | @trace 80 | def build_query_filtered(element_ids: dict[str, list[str]], query_filter: str) -> str: 81 | # ensure valid query if no ids are present 82 | element_ids = element_ids.copy() 83 | for type_, invert_ids in element_ids.items(): 84 | if not invert_ids: 85 | element_ids[type_] = ['-1'] 86 | 87 | implicit_query_way_children = bool(query_filter) 88 | 89 | # default everything query filter 90 | if not query_filter: 91 | query_filter = 'node;way;relation;' 92 | 93 | # ensure proper query ending 94 | if not query_filter.endswith(';'): 95 | query_filter += ';' 96 | 97 | # replace 'rel' alias with 'relation' 98 | for match in sorted( 99 | _RE_REL_ALIAS.finditer(query_filter), 100 | key=lambda m: m.start(), 101 | reverse=True, 102 | ): 103 | start, end = match.start(), match.end() 104 | query_filter = query_filter[:start] + 'relation' + query_filter[end:] 105 | 106 | # handle custom (!id:) 107 | for match in sorted( 108 | _RE_NEG_ID_FILTER.finditer(query_filter), 109 | key=lambda m: m.start(), 110 | reverse=True, 111 | ): 112 | start, end = match.start(), match.end() 113 | invert_ids = (i.strip() for i in match.group('id').split(',') if i.strip()) 114 | selector = _RE_TYPE_SELECTOR_GREEDY.match(query_filter[:start]).group(1) # pyright: ignore[reportOptionalMemberAccess] 115 | 116 | new_ids = set(chain.from_iterable(element_ids[et] for et in get_element_types_from_selector(selector))) 117 | new_ids = new_ids.difference(invert_ids) 118 | joined_new_ids = ','.join(new_ids) if new_ids else '-1' 119 | 120 | query_filter = query_filter[:start] + f'(id:{joined_new_ids})' + query_filter[end:] 121 | 122 | # apply element id filtering 123 | for match in sorted( 124 | _RE_TYPE_SELECTOR.finditer(query_filter), 125 | key=lambda m: m.start(), 126 | reverse=True, 127 | ): 128 | end = match.end() 129 | selector = match.group(1) 130 | 131 | joined_element_ids = ','.join( 132 | set(chain.from_iterable(element_ids[et] for et in get_element_types_from_selector(selector))) 133 | ) 134 | 135 | query_filter = query_filter[:end] + f'(id:{joined_element_ids})' + query_filter[end:] 136 | 137 | if implicit_query_way_children: 138 | return f'({query_filter});out meta;node(w);out meta;' 139 | else: 140 | return f'({query_filter});out meta;' 141 | 142 | 143 | def build_query_parents_by_ids(element_ids: dict) -> str: 144 | return ( 145 | f'node(id:{",".join(element_ids["node"]) if element_ids["node"] else "-1"})->.n;' 146 | f'way(id:{",".join(element_ids["way"]) if element_ids["way"] else "-1"})->.w;' 147 | f'rel(id:{",".join(element_ids["relation"]) if element_ids["relation"] else "-1"})->.r;' 148 | f'(way(bn.n);rel(bn.n);rel(bw.w);rel(br.r););' 149 | f'out meta;' 150 | ) 151 | 152 | 153 | @retry_exponential 154 | @trace 155 | async def fetch_overpass(http: AsyncClient, data: str, *, check_bad_request: bool = False) -> dict | str: 156 | r = await http.post('.', data={'data': data}, timeout=300) 157 | 158 | if check_bad_request and r.status_code == 400: 159 | s = r.text.find('') 160 | e = r.text.find('') 161 | if e > s > -1: 162 | body = r.text[s + 6 : e].strip() 163 | body = _RE_XML_TAG.sub('', body) 164 | lines = tuple( 165 | html.unescape(line.strip()[7:]) # 166 | for line in body.split('\n') 167 | if line.strip().startswith('Error: ') 168 | ) 169 | if lines: 170 | return '🛑 Overpass - Bad Request:\n' + '\n'.join(f'🛑 {line}' for line in lines) 171 | 172 | r.raise_for_status() # TODO: return error message instead raise 173 | return xmltodict.parse(r.text) 174 | 175 | 176 | @trace 177 | def get_current_map(actions: Iterable[dict]) -> dict[str, dict[str, dict]]: 178 | result = {'node': {}, 'way': {}, 'relation': {}} 179 | for action in actions: 180 | if action['@type'] == 'create': 181 | element_type, element = next(iter((k, v) for k, v in action.items() if not k.startswith('@'))) 182 | else: 183 | element_type, element = next(iter(action['new'].items())) 184 | result[element_type][element['@id']] = element 185 | return result 186 | 187 | 188 | # TODO: include default actions 189 | def parse_action(action: dict) -> tuple[str, dict | None, dict]: 190 | if action['@type'] == 'create': 191 | element_old = None 192 | element_type, element_new = next((k, v) for k, v in action.items() if not k.startswith('@')) 193 | elif action['@type'] in {'modify', 'delete'}: 194 | element_type, element_old = next(iter(action['old'].items())) 195 | element_new = next(iter(action['new'].values())) 196 | else: 197 | raise NotImplementedError(f'Unknown action type: {action["@type"]}') 198 | return element_type, element_old, element_new 199 | 200 | 201 | def ensure_visible_tag(element: dict | None) -> None: 202 | if not element: 203 | return 204 | if '@visible' not in element: 205 | element['@visible'] = 'true' 206 | 207 | 208 | class Overpass: 209 | def __init__(self): 210 | self._https = tuple(get_http_client(url) for url in OVERPASS_URLS) 211 | 212 | async def get_changeset_elements_history( 213 | self, 214 | changeset: dict, 215 | steps: int, 216 | query_filter: str, 217 | ) -> dict[str, list[DiffEntry]] | None: 218 | errors = [] 219 | 220 | for http in self._https: 221 | if errors: 222 | context_print(f'[2/{steps}] Retrying …') 223 | 224 | result = await self._get_changeset_elements_history(http, changeset, steps, query_filter) 225 | 226 | if isinstance(result, dict): # everything ok 227 | return result 228 | 229 | errors.append(result) 230 | 231 | # all errors are the same 232 | if all(errors[0] == e for e in errors[1:]): 233 | context_print(f'{errors[0]} (x{len(errors)})') 234 | else: 235 | context_print('❗️ Multiple errors occurred:') 236 | for i, error in enumerate(errors): 237 | context_print(f'[{i + 1}/{len(errors)}]: {error}') 238 | 239 | return None 240 | 241 | @trace 242 | async def _get_changeset_elements_history( 243 | self, 244 | http: AsyncClient, 245 | changeset: dict, 246 | steps: int, 247 | query_filter: str, 248 | ) -> dict[str, list[DiffEntry]] | str: 249 | bbox = get_bbox(changeset) 250 | changeset_id = changeset['osm']['changeset']['@id'] 251 | changeset_edits = [] 252 | current_action = [] 253 | 254 | for i, (timestamp, element_ids) in enumerate(sorted(changeset['partition'].items(), key=lambda t: t[0])): 255 | partition_adiff = get_changeset_adiff(timestamp) 256 | current_adiff = get_current_adiff(timestamp) 257 | query_unfiltered = build_query_filtered(element_ids, '') 258 | 259 | partition_query = f'[timeout:180]{bbox}{partition_adiff};{query_unfiltered}' 260 | partition_diff = await fetch_overpass(http, partition_query) 261 | 262 | if isinstance(partition_diff, str): 263 | return partition_diff 264 | 265 | partition_action = ensure_iterable(partition_diff['osm'].get('action', ())) 266 | 267 | if parse_timestamp(partition_diff['osm']['meta']['@osm_base']) <= parse_timestamp(timestamp): 268 | return '🕒️ Overpass is updating, please try again shortly' 269 | 270 | partition_size = len(partition_action) 271 | query_size = sum(len(v) for v in element_ids.values()) 272 | 273 | if partition_size != query_size: 274 | return f'❓️ Overpass data is incomplete: {partition_size} != {query_size}' 275 | 276 | if query_filter: 277 | query_filtered = build_query_filtered(element_ids, query_filter) 278 | 279 | filtered_query = f'[timeout:180]{bbox}{partition_adiff};{query_filtered}' 280 | filtered_diff = await fetch_overpass(http, filtered_query, check_bad_request=True) 281 | 282 | if isinstance(filtered_diff, str): 283 | return filtered_diff 284 | 285 | filtered_action = ensure_iterable(filtered_diff['osm'].get('action', ())) 286 | 287 | dedup_node_ids = set() 288 | data_map = {'node': {}, 'way': {}, 'relation': {}} 289 | 290 | for a in partition_action: 291 | t, o, n = parse_action(a) 292 | data_map[t][n['@id']] = (o, n) 293 | 294 | for action in filtered_action: 295 | element_type, element_old, element_new = parse_action(action) 296 | 297 | # cleanup extra nodes 298 | if element_type == 'node': 299 | # nodes of filtered query elements are often unrelated (skeleton) 300 | if element_new['@changeset'] != changeset_id: 301 | continue 302 | 303 | # the output may contain duplicate nodes due to double out …; 304 | if element_new['@id'] in dedup_node_ids: 305 | continue 306 | 307 | dedup_node_ids.add(element_new['@id']) 308 | 309 | # merge data 310 | old_new_t = data_map[element_type].get(element_new['@id'], None) 311 | 312 | if old_new_t is None: 313 | return '❓️ Overpass data is incomplete (missing_merge)' 314 | 315 | if old_new_t[1]['@version'] != element_new['@version']: 316 | return '❓️ Overpass data is incomplete (bad_merge_version)' 317 | 318 | changeset_edits.append((element_type, *old_new_t)) 319 | 320 | else: 321 | changeset_edits.extend(parse_action(a) for a in partition_action) 322 | 323 | current_query = f'[timeout:180]{bbox}{current_adiff};{query_unfiltered}' 324 | current_diff = await fetch_overpass(http, current_query) 325 | 326 | if isinstance(current_diff, str): 327 | return current_diff 328 | 329 | current_partition_action = ensure_iterable(current_diff['osm'].get('action', ())) 330 | current_action.extend(current_partition_action) 331 | 332 | context_print(f'[{i + 2}/{steps}] Partition #{i + 1}: OK') 333 | 334 | current_map = get_current_map(current_action) 335 | 336 | result: dict[str, list[DiffEntry]] = {'node': [], 'way': [], 'relation': []} 337 | 338 | for element_type, element_old, element_new in changeset_edits: 339 | # TODO: skip checks by time 340 | # NOTE: this may happen legitimately when there are multiple changesets at the same time 341 | # if element_new['@changeset'] != changeset_id: 342 | # return '❓ Overpass data is corrupted (bad_changeset)' 343 | 344 | # NOTE: this may happen legitimately when there are multiple changesets at the same time 345 | # if element_old and int(element_new['@version']) - int(element_old['@version']) != 1: 346 | # return '❓ Overpass data is corrupted (bad_version)' 347 | 348 | # NOTE: this may happen legitimately when there are multiple changesets at the same time 349 | # if not element_old and int(element_new['@version']) == 2 and not REVERT_TO_DATE: 350 | # return '❓ Overpass data is corrupted (impossible_create)' 351 | 352 | timestamp = parse_timestamp(element_new['@timestamp']) 353 | element_id = element_new['@id'] 354 | element_current = current_map[element_type].get(element_id, element_new) 355 | 356 | ensure_visible_tag(element_old) 357 | ensure_visible_tag(element_new) 358 | ensure_visible_tag(element_current) 359 | 360 | result[element_type].append(DiffEntry(timestamp, element_id, element_old, element_new, element_current)) 361 | 362 | return result 363 | 364 | @trace 365 | async def update_parents(self, invert: dict[str, list], fix_parents: bool) -> int: 366 | internal_ids = { 367 | 'node': {e['@id'] for e in invert['node']}, 368 | 'way': {e['@id'] for e in invert['way']}, 369 | 'relation': {e['@id'] for e in invert['relation']}, 370 | } 371 | counter = 0 372 | 373 | for _ in range(10): 374 | deleting_ids = { 375 | 'node': {e['@id'] for e in invert['node'] if e['@visible'] == 'false'}, 376 | 'way': {e['@id'] for e in invert['way'] if e['@visible'] == 'false'}, 377 | 'relation': {e['@id'] for e in invert['relation'] if e['@visible'] == 'false'}, 378 | } 379 | 380 | if not any(ids for ids in deleting_ids.values()): 381 | return counter 382 | 383 | # TODO: optimize bbox by merging previous bboxes 384 | # TODO: optimize processing by not processing the same deleted ids multiple times 385 | query_by_ids = build_query_parents_by_ids(deleting_ids) 386 | 387 | parents_query = f'[timeout:180];{query_by_ids}' 388 | data = await fetch_overpass(self._https[0], parents_query) 389 | 390 | if isinstance(data, str): 391 | return data 392 | 393 | invert_map = { 394 | 'node': {e['@id']: e for e in invert['node']}, 395 | 'way': {e['@id']: e for e in invert['way']}, 396 | 'relation': {e['@id']: e for e in invert['relation']}, 397 | } 398 | 399 | parents = { 400 | 'node': ensure_iterable(data['osm'].get('node', ())), 401 | 'way': ensure_iterable(data['osm'].get('way', ())), 402 | 'relation': ensure_iterable(data['osm'].get('relation', ())), 403 | } 404 | 405 | changed = False 406 | 407 | for element_type, elements in parents.items(): 408 | for element in elements: 409 | element: dict 410 | 411 | # skip internal elements when not fixing parents 412 | if not fix_parents and element['@id'] in internal_ids[element_type]: 413 | continue 414 | 415 | # use current element if present 416 | element = deepcopy(invert_map[element_type].get(element['@id'], element)) 417 | 418 | # TODO: ensure default element tags 419 | # skip if parent is already deleted 420 | if element.get('@visible', 'true') == 'false': 421 | continue 422 | 423 | deleting_child_ids = {'node': set(), 'way': set(), 'relation': set()} 424 | 425 | if element_type == 'way': 426 | element['nd'] = ensure_iterable(element.get('nd', ())) 427 | new_nds = [] 428 | 429 | for nd in element['nd']: 430 | if nd['@ref'] in deleting_ids['node']: 431 | deleting_child_ids['node'].add(nd['@ref']) 432 | else: 433 | new_nds.append(nd) 434 | 435 | element['nd'] = new_nds 436 | 437 | # delete single node ways 438 | if len(element['nd']) == 1: 439 | element['nd'] = () 440 | 441 | if not element['nd']: 442 | element['@visible'] = 'false' 443 | 444 | elif element_type == 'relation': 445 | element['member'] = ensure_iterable(element.get('member', ())) 446 | new_members = [] 447 | 448 | for m in element['member']: 449 | if m['@ref'] in deleting_ids[m['@type']]: 450 | deleting_child_ids[m['@type']].add(m['@ref']) 451 | else: 452 | new_members.append(m) 453 | 454 | element['member'] = new_members 455 | 456 | if not element['member']: 457 | element['@visible'] = 'false' 458 | 459 | else: 460 | raise NotImplementedError(f'Unknown element type: {element_type}') 461 | 462 | # skip if nothing changed 463 | if not any(ids for ids in deleting_child_ids.values()): 464 | continue 465 | 466 | changed = True 467 | 468 | if fix_parents: 469 | ensure_visible_tag(element) 470 | 471 | if element['@id'] in invert_map[element_type]: 472 | idx = next(i for i, v in enumerate(invert[element_type]) if v['@id'] == element['@id']) 473 | invert[element_type][idx] = element 474 | else: 475 | invert[element_type].append(element) 476 | counter += 1 477 | 478 | else: 479 | for key, ids in deleting_child_ids.items(): 480 | invert_key_idxs = [] 481 | 482 | for id_ in ids: 483 | idx = next((i for i, v in enumerate(invert[key]) if v['@id'] == id_), None) 484 | if idx is not None: 485 | invert_key_idxs.append(idx) 486 | internal_ids[key].remove(id_) 487 | counter += 1 488 | 489 | if not invert_key_idxs: 490 | continue 491 | 492 | invert_key_idxs.sort() 493 | 494 | invert[key] = list( 495 | chain( 496 | invert[key][: invert_key_idxs[0]], 497 | *(invert[key][left + 1 : right] for left, right in pairwise(invert_key_idxs)), 498 | invert[key][invert_key_idxs[-1] + 1 :], 499 | ) 500 | ) 501 | 502 | if not changed: 503 | return counter 504 | 505 | raise RecursionError('Parents recursion limit reached') 506 | -------------------------------------------------------------------------------- /osm_revert/utils.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import random 4 | import ssl 5 | import time 6 | from collections.abc import Iterable 7 | from functools import wraps 8 | 9 | from httpx import AsyncClient 10 | 11 | from osm_revert.config import USER_AGENT 12 | from osm_revert.context_logger import context_print 13 | 14 | _SSL_CONTEXT = ssl.create_default_context(cafile=os.environ['SSL_CERT_FILE']) 15 | 16 | 17 | def get_http_client(base_url: str, *, headers: dict | None = None) -> AsyncClient: 18 | if headers is None: 19 | headers = {} 20 | return AsyncClient( 21 | base_url=base_url, 22 | follow_redirects=True, 23 | timeout=30, 24 | headers={'User-Agent': USER_AGENT, **headers}, 25 | verify=_SSL_CONTEXT, 26 | ) 27 | 28 | 29 | def retry_exponential(func): 30 | timeout = 10 31 | start = 1 32 | 33 | @wraps(func) 34 | async def wrapper(*args, **kwargs): 35 | ts = time.perf_counter() 36 | sleep = start 37 | 38 | while True: 39 | try: 40 | return await func(*args, **kwargs) 41 | except Exception as e: 42 | if (time.perf_counter() + sleep) - ts > timeout: 43 | context_print(f'[⛔] {func.__name__} failed') 44 | raise e 45 | await asyncio.sleep(sleep) 46 | sleep = min(sleep * (1 + random.random()), 1800) # max 30 minutes # noqa: S311 47 | 48 | return wrapper 49 | 50 | 51 | def ensure_iterable(item) -> list | tuple: 52 | if item is None: 53 | return () 54 | if isinstance(item, list | tuple): 55 | return item 56 | return (item,) 57 | 58 | 59 | def is_osm_moderator(roles: Iterable[str]) -> bool: 60 | return bool({'moderator', 'administrator'}.intersection(roles)) 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | dependencies = [ 3 | "cachetools", 4 | "fastapi", 5 | "githead", 6 | "gunicorn", 7 | "httpx[brotli,zstd]", 8 | "jinja2", 9 | "sentry-sdk[fastapi,httpx,pure_eval]", 10 | "uvicorn[standard]", 11 | "xmltodict", 12 | ] 13 | name = "osm-revert" 14 | requires-python = "~=3.13" 15 | version = "0.0.0" 16 | 17 | [tool.uv] 18 | package = false 19 | python-downloads = "never" 20 | python-preference = "only-system" 21 | 22 | [tool.setuptools] 23 | packages = ["osm_revert", "web"] 24 | 25 | [tool.ruff] 26 | extend-exclude = ["osm_revert/diff_match_patch.py"] 27 | indent-width = 4 28 | line-length = 120 29 | target-version = "py313" 30 | 31 | [tool.ruff.lint] 32 | ignore = [] 33 | # see https://docs.astral.sh/ruff/rules/ for rules documentation 34 | select = [ 35 | "A", # flake8-builtins 36 | "ARG", # flake8-unused-arguments 37 | "ASYNC", # flake8-async 38 | "B", # flake8-bugbear 39 | "C4", # flake8-comprehensions 40 | "DTZ", # flake8-datetimez 41 | "E4", # pycodestyle 42 | "E7", 43 | "E9", 44 | "F", # pyflakes 45 | "FA", # flake8-future-annotations 46 | "FLY", # flynt # "FURB", # refurb 47 | "G", # flake8-logging-format 48 | "I", # isort 49 | "ICN", # flake8-import-conventions 50 | "INT", # flake8-gettext 51 | "ISC", # flake8-implicit-str-concat 52 | "LOG", # flake8-logging 53 | "N", # pep8-naming 54 | "NPY", # numpy 55 | "PERF", # perflint 56 | "PGH", # pygrep-hooks 57 | "PIE", # flake8-pie 58 | "PT", # flake8-pytest-style 59 | "PYI", # flake8-pyi 60 | "Q", # flake8-quotes 61 | "RSE", # flake8-raise 62 | "RUF", # ruff 63 | "S", # flake8-bandit 64 | "SIM", # flake8-simplify 65 | "SLF", # flake8-self 66 | "SLOT", # flake8-slots 67 | "T10", # flake8-debugger 68 | "T20", # flake8-print 69 | "TID", # flake8-tidy-imports 70 | "UP", # pyupgrade 71 | "W6", 72 | "YTT", # flake8-2020 73 | ] 74 | fixable = ["ALL"] 75 | unfixable = [] 76 | 77 | [tool.ruff.format] 78 | indent-style = "space" 79 | line-ending = "lf" 80 | quote-style = "single" 81 | skip-magic-trailing-comma = false 82 | 83 | [tool.ruff.lint.flake8-builtins] 84 | builtins-ignorelist = ["id", "open", "type"] 85 | 86 | [tool.ruff.lint.flake8-quotes] 87 | docstring-quotes = "double" 88 | inline-quotes = "single" 89 | multiline-quotes = "double" 90 | 91 | [tool.ruff.lint.pylint] 92 | max-args = 10 93 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | {}: 2 | 3 | let 4 | # Update packages with `nixpkgs-update` command 5 | pkgs = import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/59138c7667b7970d205d6a05a8bfa2d78caa3643.tar.gz") { }; 6 | 7 | pythonLibs = with pkgs; [ 8 | stdenv.cc.cc.lib 9 | ]; 10 | python' = with pkgs; (symlinkJoin { 11 | name = "python"; 12 | paths = [ python313 ]; 13 | buildInputs = [ makeWrapper ]; 14 | postBuild = '' 15 | wrapProgram "$out/bin/python3.13" --prefix LD_LIBRARY_PATH : "${lib.makeLibraryPath pythonLibs}" 16 | ''; 17 | }); 18 | 19 | packages' = with pkgs; [ 20 | python' 21 | esbuild 22 | uv 23 | ruff 24 | 25 | (writeShellScriptBin "run" '' 26 | python -m gunicorn web.main:app \ 27 | --worker-class uvicorn.workers.UvicornWorker \ 28 | --graceful-timeout 5 \ 29 | --keep-alive 300 \ 30 | --access-logfile - 31 | '') 32 | (writeShellScriptBin "make-bundle" '' 33 | # authorized.js 34 | HASH=$(esbuild web/static/js/authorized.js --bundle --minify | sha256sum | head -c8 ; echo "") && \ 35 | esbuild web/static/js/authorized.js --bundle --minify --sourcemap --charset=utf8 --outfile=web/static/js/authorized.$HASH.js && \ 36 | find web/templates -type f -exec sed -r 's|src="/static/js/authorized\..*?js"|src="/static/js/authorized.'$HASH'.js"|g' -i {} \; 37 | 38 | # style.css 39 | HASH=$(esbuild web/static/css/style.css --bundle --minify | sha256sum | head -c8 ; echo "") && \ 40 | esbuild web/static/css/style.css --bundle --minify --sourcemap --charset=utf8 --outfile=web/static/css/style.$HASH.css && \ 41 | find web/templates -type f -exec sed -r 's|href="/static/css/style\..*?css"|href="/static/css/style.'$HASH'.css"|g' -i {} \; 42 | '') 43 | (writeShellScriptBin "nixpkgs-update" '' 44 | set -e 45 | hash=$( 46 | curl --silent --location \ 47 | https://prometheus.nixos.org/api/v1/query \ 48 | -d "query=channel_revision{channel=\"nixpkgs-unstable\"}" | \ 49 | grep --only-matching --extended-regexp "[0-9a-f]{40}") 50 | sed -i -E "s|/nixpkgs/archive/[0-9a-f]{40}\.tar\.gz|/nixpkgs/archive/$hash.tar.gz|" shell.nix 51 | echo "Nixpkgs updated to $hash" 52 | '') 53 | ]; 54 | 55 | shell' = with pkgs; '' 56 | export TZ=UTC 57 | export NIX_ENFORCE_NO_NATIVE=0 58 | export NIX_SSL_CERT_FILE=${cacert}/etc/ssl/certs/ca-bundle.crt 59 | export SSL_CERT_FILE=$NIX_SSL_CERT_FILE 60 | export PYTHONNOUSERSITE=1 61 | export PYTHONPATH="" 62 | 63 | current_python=$(readlink -e .venv/bin/python || echo "") 64 | current_python=''${current_python%/bin/*} 65 | [ "$current_python" != "${python'}" ] && rm -rf .venv/ 66 | 67 | echo "Installing Python dependencies" 68 | export UV_PYTHON="${python'}/bin/python" 69 | uv sync --frozen 70 | 71 | echo "Activating Python virtual environment" 72 | source .venv/bin/activate 73 | 74 | if [ -f .env ]; then 75 | echo "Loading .env file" 76 | set -o allexport 77 | source .env set 78 | set +o allexport 79 | else 80 | echo "Skipped loading .env file (not found)" 81 | fi 82 | ''; 83 | in 84 | pkgs.mkShell { 85 | buildInputs = packages'; 86 | shellHook = shell'; 87 | } 88 | -------------------------------------------------------------------------------- /web/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from asyncio import Queue, QueueShutDown, Semaphore, TaskGroup, timeout 4 | from collections import defaultdict 5 | from contextlib import suppress 6 | from functools import lru_cache 7 | from hashlib import sha256 8 | from typing import Annotated, NewType 9 | from urllib.parse import urlencode 10 | 11 | from cachetools import TTLCache 12 | from fastapi import FastAPI, HTTPException, Query, Request, WebSocketDisconnect, status 13 | from fastapi.responses import RedirectResponse 14 | from fastapi.staticfiles import StaticFiles 15 | from fastapi.templating import Jinja2Templates 16 | from httpx import AsyncClient 17 | from pydantic import BaseModel, SecretStr 18 | from sentry_sdk import capture_exception, start_transaction, trace 19 | from starlette.websockets import WebSocket 20 | 21 | from osm_revert.config import ( 22 | CONNECTION_LIMIT, 23 | OSM_API_URL, 24 | OSM_CLIENT, 25 | OSM_SCOPES, 26 | OSM_SECRET, 27 | OSM_URL, 28 | TEST_ENV, 29 | USER_AGENT, 30 | ) 31 | from osm_revert.context_logger import context_logger 32 | from osm_revert.main import main as revert_main 33 | 34 | HashedAccessToken = NewType('HashedAccessToken', bytes) 35 | 36 | _RE_CHANGESET_SEPARATOR = re.compile(r'(?:;|,|\s)+') 37 | _RE_REPEATED_WHITESPACE = re.compile(r'\s{2,}') 38 | 39 | _HTTP = AsyncClient( 40 | headers={'User-Agent': USER_AGENT}, 41 | timeout=15, 42 | follow_redirects=True, 43 | ) 44 | 45 | _SESSION_MAX_AGE = 31536000 # 1 year 46 | _TEMPLATES = Jinja2Templates(directory='web/templates', auto_reload=TEST_ENV) 47 | _USER_CACHE: TTLCache[HashedAccessToken, dict] = TTLCache(maxsize=1024, ttl=7200) # 2 hours 48 | _ACTIVE_WS: defaultdict[HashedAccessToken, Semaphore] = defaultdict(lambda: Semaphore(CONNECTION_LIMIT)) 49 | 50 | app = FastAPI() 51 | app.mount('/static', StaticFiles(directory='web/static', html=True), name='static') 52 | 53 | 54 | @app.get('/') 55 | @app.post('/') 56 | async def index(request: Request): 57 | if user := await _fetch_user_details(request): 58 | return _TEMPLATES.TemplateResponse(request, 'authorized.jinja2', {'user': user}) 59 | else: 60 | return _TEMPLATES.TemplateResponse(request, 'index.jinja2') 61 | 62 | 63 | @app.post('/login') 64 | async def login(request: Request): 65 | state = os.urandom(32).hex() 66 | authorization_url = f'{OSM_URL}/oauth2/authorize?' + urlencode( 67 | { 68 | 'client_id': OSM_CLIENT, 69 | 'redirect_uri': str(request.url_for('callback')), 70 | 'response_type': 'code', 71 | 'scope': OSM_SCOPES, 72 | 'state': state, 73 | } 74 | ) 75 | response = RedirectResponse(authorization_url, status.HTTP_303_SEE_OTHER) 76 | response.set_cookie('oauth_state', state, secure=not TEST_ENV, httponly=True) 77 | return response 78 | 79 | 80 | @app.get('/callback') 81 | async def callback(request: Request, code: Annotated[str, Query()], state: Annotated[str, Query()]): 82 | cookie_state = request.cookies.get('oauth_state') 83 | if cookie_state != state: 84 | raise HTTPException(status.HTTP_400_BAD_REQUEST, 'Invalid OAuth state') 85 | 86 | r = await _HTTP.post( 87 | f'{OSM_URL}/oauth2/token', 88 | data={ 89 | 'client_id': OSM_CLIENT, 90 | 'client_secret': OSM_SECRET.get_secret_value(), 91 | 'redirect_uri': str(request.url_for('callback')), 92 | 'grant_type': 'authorization_code', 93 | 'code': code, 94 | }, 95 | ) 96 | r.raise_for_status() 97 | access_token = r.json()['access_token'] 98 | 99 | response = RedirectResponse('/', status.HTTP_302_FOUND) 100 | response.set_cookie('access_token', access_token, _SESSION_MAX_AGE, secure=not TEST_ENV, httponly=True) 101 | return response 102 | 103 | 104 | @app.post('/logout') 105 | async def logout(): 106 | response = RedirectResponse('/', status.HTTP_302_FOUND) 107 | response.delete_cookie('access_token') 108 | return response 109 | 110 | 111 | @app.websocket('/ws') 112 | async def websocket(ws: WebSocket): 113 | await ws.accept() 114 | 115 | try: 116 | access_token = SecretStr(ws.cookies['access_token']) 117 | except KeyError: 118 | await ws.close(1008) 119 | return 120 | 121 | semaphore = _ACTIVE_WS[_hash_access_token(access_token)] 122 | if semaphore.locked(): 123 | await ws.close(1008, 'Too many simultaneous connections for this user') 124 | return 125 | 126 | async with semaphore: 127 | try: 128 | while True: 129 | args = MainArgs(**(await ws.receive_json())) 130 | with start_transaction(op='websocket.server', name='revert'): 131 | last_message = await main(ws, access_token, args) 132 | await ws.send_json({'message': last_message, 'last': True}) 133 | except* WebSocketDisconnect: 134 | pass 135 | except* Exception as e: 136 | capture_exception(e) 137 | await ws.close(1011, str(e)) 138 | 139 | 140 | class MainArgs(BaseModel): 141 | changesets: str 142 | query_filter: str 143 | comment: str 144 | upload: bool 145 | discussion: str 146 | discussion_target: str 147 | fix_parents: bool 148 | 149 | 150 | @trace 151 | async def main(ws: WebSocket, access_token: SecretStr, args: MainArgs) -> str: 152 | changesets = _RE_CHANGESET_SEPARATOR.split(args.changesets) 153 | changesets = tuple(c.strip() for c in changesets if c.strip()) 154 | query_filter = args.query_filter.strip() 155 | comment = _RE_REPEATED_WHITESPACE.sub(' ', args.comment).strip() 156 | upload = args.upload 157 | discussion = args.discussion.strip() 158 | discussion_target = args.discussion_target 159 | fix_parents = args.fix_parents 160 | 161 | if not changesets: 162 | return '❗️ No changesets were provided' 163 | if not all(c.isnumeric() for c in changesets): 164 | return '❗️ One or more changesets contain non-numeric characters' 165 | if upload and not comment: 166 | return '❗️ No comment was provided for the changes' 167 | if discussion_target not in {'all', 'newest', 'oldest'}: 168 | return '❗️ Invalid discussion target' 169 | 170 | changeset_ids = tuple(map(int, changesets)) 171 | print_osc = not upload 172 | 173 | async def queue_processor(queue: Queue[str]): 174 | with suppress(QueueShutDown): 175 | while True: 176 | await ws.send_json({'message': await queue.get()}) 177 | 178 | async with TaskGroup() as tg: 179 | with context_logger() as queue: 180 | tg.create_task(queue_processor(queue)) 181 | async with timeout(1800): # 30 minutes 182 | exit_code = await revert_main( 183 | changeset_ids=changeset_ids, 184 | comment=comment, 185 | access_token=access_token, 186 | discussion=discussion, 187 | discussion_target=discussion_target, 188 | print_osc=print_osc, 189 | query_filter=query_filter, 190 | fix_parents=fix_parents, 191 | ) 192 | return f'Exit code: {exit_code}' 193 | 194 | 195 | async def _fetch_user_details(request: Request) -> dict | None: 196 | if 'access_token' not in request.cookies: 197 | return None 198 | access_token = SecretStr(request.cookies['access_token']) 199 | hashed_access_token = _hash_access_token(access_token) 200 | cached = _USER_CACHE.get(hashed_access_token) 201 | if cached is not None: 202 | return cached 203 | 204 | r = await _HTTP.get( 205 | f'{OSM_API_URL}/api/0.6/user/details.json', 206 | headers={'Authorization': f'Bearer {access_token.get_secret_value()}'}, 207 | ) 208 | if not r.is_success: 209 | return None 210 | user = r.json() 211 | 212 | if 'img' not in user: 213 | user['img'] = {'href': None} 214 | 215 | _USER_CACHE[hashed_access_token] = user 216 | return user 217 | 218 | 219 | @lru_cache(maxsize=128) 220 | def _hash_access_token(access_token: SecretStr) -> HashedAccessToken: 221 | return HashedAccessToken(sha256(access_token.get_secret_value().encode()).digest()) 222 | -------------------------------------------------------------------------------- /web/static/css/style.css: -------------------------------------------------------------------------------- 1 | abbr { 2 | color: #777; 3 | cursor: help; 4 | vertical-align: super; 5 | font-size: .8em; 6 | } 7 | 8 | button img { 9 | position: relative; 10 | top: -0.05em; 11 | } 12 | 13 | .header img { 14 | position: relative; 15 | top: -0.05em; 16 | } 17 | 18 | .required::after { 19 | content: '*'; 20 | color: #e00; 21 | font-weight: bold; 22 | } 23 | 24 | .char-counter { 25 | display: none; 26 | font-size: .8em; 27 | margin-top: .25em; 28 | text-align: end; 29 | } 30 | -------------------------------------------------------------------------------- /web/static/img/brands/openstreetmap.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/web/static/img/brands/openstreetmap.webp -------------------------------------------------------------------------------- /web/static/img/favicon/256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/web/static/img/favicon/256.png -------------------------------------------------------------------------------- /web/static/img/favicon/256.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/web/static/img/favicon/256.webp -------------------------------------------------------------------------------- /web/static/img/favicon/480.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/web/static/img/favicon/480.png -------------------------------------------------------------------------------- /web/static/img/favicon/480.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Zaczero/osm-revert/1c9ad77a4df6f1ec88b4e122a5b5a1e3badd824e/web/static/img/favicon/480.webp -------------------------------------------------------------------------------- /web/static/js/authorized.js: -------------------------------------------------------------------------------- 1 | const modal = document.getElementById('first-time-modal') 2 | if (!localStorage.getItem('first-time-modal-acknowledged')) { 3 | console.log('Showing first-time-modal') 4 | const backdrop = document.createElement('div') 5 | backdrop.classList.add('modal-backdrop', 'show') 6 | document.body.appendChild(backdrop) 7 | 8 | modal.classList.add('d-block') 9 | const acceptButton = modal.querySelector('button') 10 | acceptButton.addEventListener('click', () => { 11 | console.debug('Acknowledging first-time-modal') 12 | localStorage.setItem('first-time-modal-acknowledged', 'true') 13 | backdrop.remove() 14 | modal.remove() 15 | }, {once: true}) 16 | acceptButton.focus() 17 | } else { 18 | console.log('Not showing first-time-modal') 19 | modal.remove() 20 | } 21 | 22 | const form = document.getElementById('form') 23 | const changesets = document.getElementById('changesets') 24 | const query_filter = document.getElementById('query-filter') 25 | const comment = document.getElementById('comment') 26 | const discussion = document.getElementById('discussion') 27 | const submit = document.getElementById('submit') 28 | const submit_osc = document.getElementById('submit-osc') 29 | const log = document.getElementById('log') 30 | const ws = new WebSocket(`${document.location.protocol === 'https:' ? 'wss' : 'ws'}://${document.location.host}/ws`) 31 | 32 | let isAutoScrolling = true 33 | let isReverting = true 34 | let clearFields = false 35 | 36 | let wsDownloadingOsc = false 37 | let wsOsc = [] 38 | 39 | const setIsReverting = state => { 40 | if (state) { 41 | wsDownloadingOsc = false 42 | wsOsc = [] 43 | } 44 | 45 | isReverting = state 46 | submit.disabled = state 47 | submit_osc.disabled = state 48 | } 49 | 50 | ws.onopen = () => { 51 | submit.value = 'Revert and upload' 52 | submit_osc.value = '💾 Revert and download .osc' 53 | setIsReverting(false) 54 | } 55 | 56 | ws.onmessage = e => { 57 | const obj = JSON.parse(e.data) 58 | 59 | if (obj.message === '') { 60 | wsDownloadingOsc = true 61 | wsOsc = [] 62 | } else if (obj.message === '') { 63 | const fileName = 'revert_' + new Date().toISOString().replace(/:/g, '_') + '.osc' 64 | const osc = wsOsc.join('\n') 65 | 66 | const a = document.createElement('a') 67 | const file = new Blob([osc], {type: 'text/xml; charset=utf-8'}) 68 | a.href = URL.createObjectURL(file) 69 | a.download = fileName 70 | a.click() 71 | 72 | wsDownloadingOsc = false 73 | wsOsc = [] 74 | } else if (wsDownloadingOsc) { 75 | wsOsc.push(obj.message) 76 | } else { 77 | log.value += obj.message + '\n' 78 | 79 | if (isAutoScrolling && log.scrollHeight > log.clientHeight) 80 | log.scrollTop = log.scrollHeight 81 | } 82 | 83 | if (obj.last === true) { 84 | if (clearFields && obj.message === 'Exit code: 0') { 85 | changesets.value = '' 86 | } 87 | 88 | setIsReverting(false) 89 | } 90 | } 91 | 92 | ws.onclose = e => { 93 | console.log(e) 94 | setIsReverting(true) 95 | submit.value = 'Disconnected' 96 | submit_osc.value = 'Disconnected' 97 | log.value = `⚠️ Disconnected: ${e.reason}\n⚠️ Please reload the page` 98 | } 99 | 100 | const beginRevert = upload => { 101 | if (isReverting) 102 | return 103 | 104 | setIsReverting(true) 105 | clearFields = upload 106 | log.value = '' 107 | 108 | ws.send(JSON.stringify({ 109 | changesets: changesets.value, 110 | query_filter: query_filter.value, 111 | comment: comment.value, 112 | upload: upload, 113 | discussion: discussion.value, 114 | discussion_target: document.querySelector('input[name="discussion_target"]:checked').value, 115 | fix_parents: document.querySelector('input[name="fix_parents"]:checked').value === 'True', 116 | })) 117 | } 118 | 119 | submit.addEventListener('click', e => { 120 | e.preventDefault() 121 | 122 | beginRevert(true) 123 | }) 124 | 125 | submit_osc.addEventListener('click', e => { 126 | e.preventDefault() 127 | 128 | beginRevert(false) 129 | }) 130 | 131 | log.addEventListener('scroll', () => { 132 | isAutoScrolling = log.scrollHeight - log.scrollTop < log.clientHeight + 5 133 | }) 134 | 135 | for (const counter of document.querySelectorAll('.char-counter')) { 136 | const input = document.getElementById(counter.getAttribute('for')) 137 | const maxLength = input.getAttribute('maxlength') 138 | 139 | input.oninput = () => { 140 | const charsLeft = maxLength - [...input.value].length 141 | if (charsLeft <= 100) { 142 | if (charsLeft <= 0) 143 | counter.textContent = `No characters left` 144 | else 145 | counter.textContent = `${charsLeft} character${charsLeft !== 1 ? 's' : ''} left` 146 | 147 | counter.style.color = charsLeft <= 20 ? 'red' : 'initial' 148 | counter.style.display = 'block' 149 | } else { 150 | counter.style.display = 'none' 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /web/templates/_base.jinja2: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | osm-revert 8 | 9 | {% block head %}{% endblock %} 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 41 | 42 |
43 |
44 | {% block body %}{% endblock %} 45 |
46 |
47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /web/templates/authorized.jinja2: -------------------------------------------------------------------------------- 1 | {% extends '_base.jinja2' %} 2 | {% block body %} 3 | {% set changesets = request.query_params.get('changesets', '') %} 4 | {% set query_filter = request.query_params.get('query-filter', '') %} 5 | 6 |
7 |

8 | osm-revert logo 9 | osm-revert 10 |

11 | 12 |
13 | {% if user.img.href %} 14 | Profile picture 15 | {% endif %} 16 | 17 |
{{ user.display_name }}
18 | 19 |
20 | 21 |
22 |
23 |
24 | 25 | 30 | 31 | 39 | 40 | 47 | 48 | 55 | 56 |
57 |
58 | 59 | 60 |
61 |
62 | 63 | 64 |
65 |
66 | 67 | 68 |
69 |
70 | 71 |
72 | Resolve parent conflicts: 73 | (?) 74 |
75 |
76 | 77 | 80 |
81 |
82 | 83 | 86 |
87 |
88 |
89 | 90 |
91 |
92 | 93 |
94 |
95 | 96 |
97 |
98 | 99 | 103 | 104 | 132 | 133 | 134 | {% endblock %} 135 | -------------------------------------------------------------------------------- /web/templates/index.jinja2: -------------------------------------------------------------------------------- 1 | {% extends '_base.jinja2' %} 2 | {% block head %} 3 | 7 | 8 | {% endblock %} 9 | {% block body %} 10 | 11 |

12 | osm-revert logo 13 | osm-revert 14 |

15 | 16 |
    17 |
  • 18 | Faster
    19 | This reverter uses 20 | Overpass 21 | to reduce the amount of API calls. 22 |
  • 23 |
  • 24 | Smarter
    25 | This reverter uses 26 | Google's diff-match-patch 27 | library to automatically resolve conflicts. 28 |
  • 29 |
  • 30 | Limitless
    31 | This reverter has no arbitrary limits on the changeset size. 32 |
  • 33 |
34 | 35 |
36 | 40 |
41 | {% endblock %} 42 | --------------------------------------------------------------------------------