├── .anylint ├── .codacy.yml ├── .codebeatignore ├── .coveragerc ├── .gitattributes ├── .gitignore ├── .isort.cfg ├── .readthedocs.yml ├── .travis.yml ├── .whitesource ├── LICENSE ├── MANIFEST.in ├── README.rst ├── codecov.yml ├── docker └── MongoDB │ ├── docker-compose.yml │ ├── mongo-express.env │ ├── mongo-initdb.d │ └── createUser.js │ └── mongo.env ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── conf.py │ ├── index.rst │ ├── intro │ └── installation.rst │ ├── items.rst │ ├── pipelines │ ├── ItemPipeline.rst │ └── MongoDB.rst │ ├── settings.rst │ └── signals.rst ├── mypy.ini ├── pylintrc ├── pyproject.toml ├── pytest.ini ├── renovate.json ├── requirements.txt ├── scrapy_pipelines ├── __init__.py ├── _version.py ├── items.py ├── pipelines │ ├── __init__.py │ └── mongo.py ├── settings │ ├── __init__.py │ └── default_settings.py └── signals.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── requirements.txt ├── test_pipelines_mongo.py └── test_settings.py ├── tox.ini └── versioneer.py /.anylint: -------------------------------------------------------------------------------- 1 | { 2 | "ignore":[ 3 | "scrapy_pipelines/_version.py", 4 | "versioneer.py" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /.codacy.yml: -------------------------------------------------------------------------------- 1 | exclude_paths: 2 | - '.github/**' 3 | - 'scrapy_pipelines/_version.py' 4 | - 'tests/**' 5 | - 'versioneer.py' 6 | -------------------------------------------------------------------------------- /.codebeatignore: -------------------------------------------------------------------------------- 1 | scrapy_pipelines/_version.py 2 | versioneer.py 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | scrapy_pipelines/_version.py -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | scrapy_pipelines/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | docker/MongoDB/mongo-db/ 107 | docker/MongoDB/mongo-home/ 108 | 109 | Pipfile.lock 110 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | skip=scrapy_pipelines/_version.py 3 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | # Build documentation with MkDocs 13 | #mkdocs: 14 | # configuration: mkdocs.yml 15 | 16 | # Optionally build your docs in additional formats such as PDF and ePub 17 | formats: all 18 | 19 | # Optionally set the version of Python and requirements required to build your docs 20 | python: 21 | version: 3.7 22 | install: 23 | - requirements: docs/requirements.txt 24 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | services: 4 | - mongodb 5 | 6 | sudo: false 7 | 8 | branches: 9 | only: 10 | - master 11 | - "/^v\\d+\\.\\d+$/" 12 | - "/^\\d\\.\\d+$/" 13 | - "/^\\d\\.\\d+\\.\\d+(rc\\d+|\\.dev\\d+)?$/" 14 | 15 | matrix: 16 | include: 17 | - env: TOXENV=py36 18 | python: 3.6 19 | - dist: xenial 20 | env: TOXENV=py37 21 | python: 3.7 22 | - env: TOXENV=docs 23 | python: 3.6 24 | - env: TOXENV=docs-coverage 25 | python: 3.6 26 | - env: TOXENV=docs-links 27 | python: 3.6 28 | 29 | install: 30 | - pip install -U tox twine wheel codecov 31 | 32 | before_script: 33 | - mongo test_db --eval 'db.createUser({user:"test_username",pwd:"test_password",roles:["readWrite"]});' 34 | 35 | script: tox 36 | 37 | after_success: 38 | - codecov 39 | 40 | cache: 41 | directories: 42 | - "$HOME/.cache/pip" 43 | 44 | notifications: 45 | slack: 46 | secure: n6gYbtGfOf3AlRKvwSpAgH7t55oJn5FN8BBMiLUpzUz9lto1GBF2c4l2myLjrKK7HmHjJ3lqVIovKBWkx8DT4lygMb6/QvTOnpudQUNan9qk1O2p87wenlKAraE/UAeku+iBTbLdSqo2NoFWOtx7BKgPXDCZpbDJ69Bmj5w3q73ksgTYDkydQdyR0RsnL6Q0LoaFNOmgTphje8uk5u5fM+R2M3Yi3hJmGF69vi5qki8kRoBkah00N8VohNOZ2Sr78cxv09MxkNnDizOhAEGqmyWZQaa8GXTPZYtr8f7CS6KE2UYhgRULdGBDr1LT5LhpP1dY2FD8Wb138yYMQ3hclLvrFiJEpKc3WZ/wAvWnKD/dhfS/6W4soxf1biDiNZSp0ROdqV2vJsqqM4XxSZrL8TzZCu2hyIDPw9DlVMpFW4v88c1F6wa7Ug1MDiVHPgK0d044ccRGFg5KXnsS6TpajmLsNZmxxSZvy3n1dla3heaP1oEb0n+TWl+cctrfh7Rw4iNJF/i5DkSabn14IQ8b4NYd2xWIeQoEtqjyZPXHEGRbz7xJ44f0AUo00ptugRr5/BuKTyxKtwvsE6HEx+3w2HTJ8q6h6xc71Dyih67Ga6n9X7gcUY6UEqCdlAkvjCFIro4jqScTbDadLXvN7aiaev/9lmcfSkM2GBYuVhlyhlw= 47 | 48 | deploy: 49 | provider: pypi 50 | distributions: sdist bdist_wheel 51 | user: scrapedia 52 | password: 53 | secure: Cic+TcpBqYRKXz/GVXA3EaCIu0uy+OXu8Jqi0EdGLDBB+2iUvKEjpSvMT8z2LZH6xZMwI6YwZRSGFdgdZFGF/Y/FxL8FFePCi5jqre9iiwGMIoJLCzLzONHkiz0IBsKtU3uJDtKBU1/NG7Nl7R2kT2HK97vKzt8q6VJpNdPkZLNeT2rpa4Fd9czfaxZ8GEHZ/rxYAVN8eltOGkbD2lh5+cfGxU1EvPJLnMLXGCIefw/+uS5+bt5urGKwF68PoeUEEdu9CcaH5bfo8XsOWqPxBcpyrf7UAufzYVOdTdr2FrBnxj1Xl2sz3ENXw7eRwoh5EoC30QaC3O06NgIm3P38IFSzNxhIvtNrFxCMWI49Wttljx9IuTwscDa1Jg/JlPb1QOHycBtrRgqSHR5MDzBmWdJB4w68S6Igq5rlQT4f0urrrVdLVuJqfkt6I2A22KJOl7rCLXN6Yn2ida4pZk/QBVlHaCEy3YfVx9yIcUjCVcq0DVzETll94zWH0JVRMRv5jEJvoscYNlex/ra/dSoEff3lpqjUH6J94R14FkjrZU+kC+h9vZkqcK8MUZSaDxghiKxOm57ons0gro7KIDHx1KZulQp2QUBUDpSZD0EdOGf6CGGauecf3cMza1WORMJjjeCrkCZA1LsbKnngcSItOUL43+8va/AEPSB4XImQFzU= 54 | on: 55 | branch: master 56 | condition: "$TOXENV == py36" 57 | repo: scrapedia/scrapy-pipelines 58 | tags: true 59 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "generalSettings": { 3 | "shouldScanRepo": true 4 | }, 5 | "checkRunSettings": { 6 | "vulnerableCheckRunConclusionLevel": "failure" 7 | } 8 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include versioneer.py 2 | include scrapy_pipelines/_version.py 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Read more: `noffle/art-of-readme: Learn the art of writing quality READMEs.`_ 2 | 3 | .. _`noffle/art-of-readme: Learn the art of writing quality READMEs.`: https://github.com/noffle/art-of-readme 4 | 5 | ================ 6 | Scrapy-Pipelines 7 | ================ 8 | 9 | Overview 10 | ======== 11 | 12 | .. image:: https://bestpractices.coreinfrastructure.org/projects/2828/badge 13 | :alt: CII Best Practices 14 | :target: https://bestpractices.coreinfrastructure.org/projects/2828 15 | 16 | .. image:: https://mperlet.github.io/pybadge/badges/9.43.svg 17 | :alt: pylint Score 18 | 19 | .. image:: https://img.shields.io/travis/scrapedia/scrapy-pipelines/master.svg 20 | :target: http://travis-ci.org/scrapedia/scrapy-pipelines 21 | :alt: Travis branch 22 | 23 | .. image:: https://codecov.io/gh/scrapedia/scrapy-pipelines/branch/master/graph/badge.svg 24 | :target: https://codecov.io/gh/scrapedia/scrapy-pipelines 25 | :alt: Coverage Report 26 | 27 | .. image:: https://codebeat.co/badges/fabc61ba-6a20-4bd1-bf73-a2f091a9ad80 28 | :target: https://codebeat.co/projects/github-com-scrapedia-scrapy-pipelines-master 29 | :alt: codebeat badge 30 | 31 | .. image:: https://api.codacy.com/project/badge/Grade/aeda92e058434a9eb2e8b0512a02235f 32 | :target: https://www.codacy.com/app/grammy-jiang/scrapy-pipelines?utm_source=github.com&utm_medium=referral&utm_content=scrapedia/scrapy-pipelines&utm_campaign=Badge_Grade 33 | 34 | .. image:: https://pyup.io/repos/github/scrapedia/scrapy-pipelines/shield.svg 35 | :target: https://pyup.io/repos/github/scrapedia/scrapy-pipelines/ 36 | :alt: Updates 37 | 38 | .. image:: https://snyk.io/test/github/scrapedia/scrapy-pipelines/badge.svg 39 | :target: https://snyk.io/test/github/scrapedia/scrapy-pipelines 40 | :alt: Known Vulnerabilities 41 | .. image:: https://img.shields.io/badge/code%20style-black-000000.svg 42 | :target: https://github.com/python/black 43 | :alt: Code style: black 44 | 45 | .. image:: https://img.shields.io/badge/License-GPLv3-blue.svg 46 | :target: https://www.gnu.org/licenses/gpl-3.0 47 | :alt: License: AGPL v3 48 | 49 | Since Scrapy doesn't provide enough pipelines examples for different backends 50 | or databases, this repository provides severals to demostrate the 51 | decent usages, including: 52 | 53 | * MongoDB 54 | * Redis (todo) 55 | * InfluxDB (todo) 56 | * LevelDB (todo) 57 | 58 | And also these pipelines provide multiple ways to save or update the items, and 59 | return id created by backends 60 | 61 | Requirements 62 | ============= 63 | 64 | .. image:: https://pyup.io/repos/github/scrapedia/r18/python-3-shield.svg 65 | :target: https://pyup.io/repos/github/scrapedia/r18/ 66 | :alt: Python 3 67 | 68 | * Python 3.6+ 69 | * Works on Linux, Windows, Mac OSX 70 | 71 | Installation 72 | ============ 73 | 74 | .. image:: https://img.shields.io/pypi/v/scrapy-pipelines.svg 75 | :target: https://pypi.python.org/pypi/scrapy-pipelines 76 | :alt: PyPI 77 | .. image:: https://img.shields.io/pypi/pyversions/scrapy-pipelines.svg 78 | :target: https://pypi.python.org/pypi/scrapy-pipelines 79 | :alt: PyPI - Python Version 80 | .. image:: https://img.shields.io/pypi/wheel/scrapy-pipelines.svg 81 | :target: https://pypi.python.org/pypi/scrapy-pipelines 82 | :alt: PyPI - Wheel 83 | 84 | The quick way: 85 | 86 | pip install scrapy-pipelines 87 | 88 | For more details see the installation section in the documentation: 89 | https://scrapy-pipelines.readthedocs.io/en/latest/intro/installation.html 90 | 91 | Documentation 92 | ============= 93 | 94 | Documentation is available online at 95 | https://scrapy-pipelines.readthedocs.io/en/latest/ and in the docs directory. 96 | 97 | Community (blog, twitter, mail list, IRC) 98 | ========================================= 99 | 100 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.* 101 | 102 | See https://scrapy.org/community/ 103 | 104 | Contributing 105 | ============ 106 | 107 | *Keeping this section same as Scrapy is intending to be easier when this repo 108 | merge back to Scrapy.* 109 | 110 | See https://doc.scrapy.org/en/master/contributing.html 111 | 112 | Code of Conduct 113 | --------------- 114 | 115 | Please note that this project is released with a Contributor Code of Conduct 116 | (see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md). 117 | 118 | By participating in this project you agree to abide by its terms. 119 | Please report unacceptable behavior to opensource@scrapinghub.com. 120 | 121 | 122 | Companies using Scrapy 123 | ====================== 124 | 125 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.* 126 | 127 | See https://scrapy.org/companies/ 128 | 129 | Commercial Support 130 | ================== 131 | 132 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.* 133 | 134 | See https://scrapy.org/support/ 135 | 136 | TODO 137 | ==== 138 | 139 | * [X] Add indexes creation in open_spider() 140 | * [X] Add item_completed method 141 | * [X] Add signals for MongoDB document's id return 142 | * [ ] Add MongoDB document update 143 | * [ ] Add Percona Server for MongoDB docker support 144 | * [ ] Add Redis support 145 | * [ ] Add InfluxDB support 146 | * [ ] Add LevelDB support 147 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | notify: 3 | slack: 4 | default: 5 | url: "https://hooks.slack.com/services/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" 6 | -------------------------------------------------------------------------------- /docker/MongoDB/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | # https://hub.docker.com/_/mongo/ 4 | mongo: 5 | container_name: dc-mongo 6 | env_file: 7 | - mongo.env 8 | image: mongo:latest 9 | networks: 10 | - mongo 11 | ports: 12 | - 27017:27017 13 | restart: always 14 | tty: true 15 | volumes: 16 | - ./mongo-db:/data/db 17 | - ./mongo-initdb.d:/docker-entrypoint-initdb.d 18 | # https://hub.docker.com/_/mongo-express/ 19 | mongo-express: 20 | container_name: dc-mongodb-express 21 | depends_on: 22 | - mongo 23 | env_file: 24 | - mongo-express.env 25 | image: mongo-express:latest 26 | links: 27 | - mongo 28 | networks: 29 | - mongo 30 | ports: 31 | - 8081:8081 32 | restart: always 33 | tty: true 34 | 35 | networks: 36 | mongo: 37 | driver: bridge 38 | -------------------------------------------------------------------------------- /docker/MongoDB/mongo-express.env: -------------------------------------------------------------------------------- 1 | ME_CONFIG_MONGODB_ADMINUSERNAME=root 2 | ME_CONFIG_MONGODB_ADMINPASSWORD=password 3 | ME_CONFIG_MONGODB_PORT=27017 4 | ME_CONFIG_MONGODB_SERVER=mongo 5 | -------------------------------------------------------------------------------- /docker/MongoDB/mongo-initdb.d/createUser.js: -------------------------------------------------------------------------------- 1 | db = db.getSiblingDB('test_db'), 2 | db.createUser({ 3 | user: "test_username", 4 | pwd: "test_password", 5 | roles: ["readWrite"] 6 | }); 7 | -------------------------------------------------------------------------------- /docker/MongoDB/mongo.env: -------------------------------------------------------------------------------- 1 | MONGO_INITDB_ROOT_USERNAME=root 2 | MONGO_INITDB_ROOT_PASSWORD=password 3 | MONGO_INITDB_DATABASE=admin -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pymongo==3.11.3 2 | scrapy==2.4.1 3 | sphinx==3.5.2 4 | txmongo==19.2.0 5 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import sys 14 | from pathlib import Path 15 | 16 | sys.path.insert(0, str(Path("/").joinpath(*Path(__file__).parts[:-3]))) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "Scrapy Pipelines" 22 | copyright = "2019, Scrapedia" 23 | author = "Scrapedia" 24 | 25 | 26 | # -- General configuration --------------------------------------------------- 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.coverage"] 32 | 33 | # Add any paths that contain templates here, relative to this directory. 34 | templates_path = ["_templates"] 35 | 36 | # List of patterns, relative to source directory, that match files and 37 | # directories to ignore when looking for source files. 38 | # This pattern also affects html_static_path and html_extra_path. 39 | exclude_patterns = [] 40 | 41 | 42 | # -- Options for HTML output ------------------------------------------------- 43 | 44 | # The theme to use for HTML and HTML Help pages. See the documentation for 45 | # a list of builtin themes. 46 | # 47 | html_theme = "alabaster" 48 | 49 | # Add any paths that contain custom static files (such as style sheets) here, 50 | # relative to this directory. They are copied after the builtin static files, 51 | # so a file named "default.css" will overwrite the builtin "default.css". 52 | html_static_path = ["_static"] 53 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. _topics-index: 2 | 3 | Scrapy Pipelines |version| documentation 4 | ======================================== 5 | 6 | .. toctree:: 7 | :hidden: 8 | :maxdepth: 2 9 | :caption: Contents: 10 | 11 | intro/installation 12 | pipelines/ItemPipeline 13 | pipelines/MongoDB 14 | 15 | items 16 | settings 17 | signals 18 | 19 | :doc:`intro/installation` 20 | Installation 21 | 22 | :doc:`pipelines/ItemPipeline` 23 | The root class for all pipelines 24 | 25 | :doc:`pipelines/MongoDB` 26 | Save items into MongoDB 27 | 28 | :doc:`items` 29 | Items used in these pipelines 30 | 31 | :doc:`settings` 32 | Settings for these pipelines 33 | 34 | :doc:`signals` 35 | Signals used in these pipelines 36 | 37 | Indices and tables 38 | ================== 39 | 40 | * :ref:`genindex` 41 | * :ref:`modindex` 42 | * :ref:`search` 43 | -------------------------------------------------------------------------------- /docs/source/intro/installation.rst: -------------------------------------------------------------------------------- 1 | .. _intro-installation: 2 | 3 | ============ 4 | Installation 5 | ============ 6 | -------------------------------------------------------------------------------- /docs/source/items.rst: -------------------------------------------------------------------------------- 1 | .. _items: 2 | 3 | ===== 4 | Items 5 | ===== 6 | 7 | .. automodule:: scrapy_pipelines.items 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/pipelines/ItemPipeline.rst: -------------------------------------------------------------------------------- 1 | .. _pipelines-itempipeline: 2 | 3 | ============ 4 | ItemPipeline 5 | ============ 6 | 7 | .. autoclass:: scrapy_pipelines.pipelines.ItemPipeline 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/pipelines/MongoDB.rst: -------------------------------------------------------------------------------- 1 | .. _pipelines-MongoDB: 2 | 3 | ================ 4 | Pipeline MongoDB 5 | ================ 6 | 7 | .. autoclass:: scrapy_pipelines.pipelines.mongo.MongoPipeline 8 | :members: -------------------------------------------------------------------------------- /docs/source/settings.rst: -------------------------------------------------------------------------------- 1 | .. _settings: 2 | 3 | ======== 4 | Settings 5 | ======== 6 | 7 | .. automodule:: scrapy_pipelines.settings 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/signals.rst: -------------------------------------------------------------------------------- 1 | .. _signals: 2 | 3 | ======= 4 | Signals 5 | ======= 6 | 7 | .. automodule:: scrapy_pipelines.signals 8 | :members: 9 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | ignore=_version.py 3 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | exclude = ''' 3 | ( 4 | /( 5 | \.eggs # exclude a few common directories in the 6 | | \.git # root of the project 7 | | \.hg 8 | | \.mypy_cache 9 | | \.tox 10 | | \.venv 11 | | _build 12 | | buck-out 13 | | build 14 | | dist 15 | )/ 16 | | foo.py # also separately exclude a file named foo.py in 17 | # the root of the project 18 | | scrapy_pipelines/_version.py 19 | ) 20 | ''' 21 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = 3 | --cov=scrapy_pipelines tests/ 4 | --ignore=docker/ 5 | --numprocesses=auto 6 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | txmongo==19.2.0 2 | scrapy==2.4.1 3 | -------------------------------------------------------------------------------- /scrapy_pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The pipelines used to save items 3 | """ 4 | from ._version import get_versions 5 | 6 | __version__ = get_versions()["version"] 7 | del get_versions 8 | -------------------------------------------------------------------------------- /scrapy_pipelines/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "667b87c8ff490e87d95d03ca0aaa715b9ceda47d" 28 | git_date = "2021-03-10 10:20:01 +1100" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440" 44 | cfg.tag_prefix = "" 45 | cfg.parentdir_prefix = "" 46 | cfg.versionfile_source = "scrapy_pipelines/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /scrapy_pipelines/items.py: -------------------------------------------------------------------------------- 1 | """ 2 | A customized item for MongoDB 3 | """ 4 | from scrapy.item import Field, Item 5 | 6 | 7 | class BSONItem(Item): 8 | """ 9 | Pymongo creates `_id` automatcially in the object after inserting 10 | """ 11 | 12 | _id = Field() 13 | -------------------------------------------------------------------------------- /scrapy_pipelines/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Item pipeline 3 | 4 | See documentation in docs/item-pipeline.rst 5 | """ 6 | import logging 7 | from abc import ABC, abstractmethod 8 | 9 | from scrapy.crawler import Crawler 10 | from scrapy.item import Item 11 | from scrapy.settings import SETTINGS_PRIORITIES, Settings 12 | from scrapy.spiders import Spider 13 | 14 | from scrapy_pipelines.settings import default_settings, unfreeze_settings 15 | 16 | LOGGER = logging.getLogger(__name__) 17 | 18 | 19 | class ItemPipeline(ABC): 20 | """ 21 | Abstract Class for the item pipeline 22 | """ 23 | 24 | def __init__(self, settings: Settings = None): 25 | """ 26 | 27 | :param settings: 28 | :type settings: Settings 29 | """ 30 | self.settings = settings 31 | self.crawler: Crawler = None 32 | 33 | @classmethod 34 | def from_crawler(cls, crawler: Crawler): 35 | """ 36 | 37 | :param crawler: 38 | :type crawler: Crawler 39 | :return: 40 | :rtype: 41 | """ 42 | with unfreeze_settings(crawler.settings) as settings: 43 | settings.setmodule( 44 | module=default_settings, priority=SETTINGS_PRIORITIES["default"] 45 | ) 46 | try: 47 | pipe = cls.from_settings(crawler.settings) 48 | except AttributeError: 49 | pipe = cls() 50 | pipe.crawler = crawler 51 | return pipe 52 | 53 | @classmethod 54 | @abstractmethod 55 | def from_settings(cls, settings: Settings): 56 | """ 57 | 58 | :param settings: 59 | :type settings: Settings 60 | :return: 61 | :rtype: 62 | """ 63 | return cls(settings=settings) 64 | 65 | @abstractmethod 66 | def open_spider(self, spider: Spider): 67 | """ 68 | 69 | :param spider: 70 | :type spider: Spider 71 | :return: 72 | :rtype: 73 | """ 74 | 75 | @abstractmethod 76 | def close_spider(self, spider: Spider): 77 | """ 78 | 79 | :param spider: 80 | :type spider: Spider 81 | :return: 82 | :rtype: 83 | """ 84 | 85 | @abstractmethod 86 | def process_item(self, item: Item, spider: Spider) -> Item: 87 | """ 88 | 89 | :param item: 90 | :type item: Item 91 | :param spider: 92 | :type spider: Spider 93 | :return: 94 | :rtype: Item 95 | """ 96 | -------------------------------------------------------------------------------- /scrapy_pipelines/pipelines/mongo.py: -------------------------------------------------------------------------------- 1 | """ 2 | MongoDB Async Item Pipeline with txmongo 3 | """ 4 | import inspect 5 | import logging 6 | from typing import Callable, Dict, Tuple 7 | 8 | from pymongo.errors import OperationFailure 9 | from pymongo.results import InsertOneResult 10 | from scrapy.crawler import Crawler 11 | from scrapy.item import Item 12 | from scrapy.settings import Settings 13 | from scrapy.spiders import Spider 14 | from twisted.internet.defer import inlineCallbacks 15 | from txmongo.collection import Collection 16 | from txmongo.connection import ConnectionPool 17 | from txmongo.database import Database 18 | from txmongo.filter import sort as txsort 19 | 20 | from scrapy_pipelines.pipelines import ItemPipeline 21 | from scrapy_pipelines.signals import item_id 22 | 23 | LOGGER = logging.getLogger(__name__) 24 | 25 | 26 | def get_args(func: Callable) -> Tuple[str, ...]: 27 | """ 28 | 29 | :param func: 30 | :type func: callable 31 | :return: 32 | :rtype: tuple 33 | """ 34 | sig = inspect.signature(func) 35 | return tuple(sig.parameters.keys()) 36 | 37 | 38 | class MongoPipeline(ItemPipeline): 39 | """ 40 | A pipeline saved into MongoDB asynchronously with txmongo 41 | """ 42 | 43 | def __init__(self, uri: str, settings: Settings): 44 | """ 45 | 46 | :param uri: 47 | :type uri: str 48 | :param settings: 49 | :type settings: 50 | """ 51 | super(MongoPipeline, self).__init__(settings=settings) 52 | 53 | self.uri: str = uri 54 | 55 | self.mongo: ConnectionPool = None 56 | self.database: Database = None 57 | self.collection: Collection = None 58 | 59 | @classmethod 60 | def from_crawler(cls, crawler: Crawler): 61 | """ 62 | 63 | :param crawler: 64 | :type crawler: Crawler 65 | :return: 66 | :rtype: MongoPipeline 67 | """ 68 | pipe = super().from_crawler(crawler=crawler) 69 | crawler.signals.connect(receiver=pipe.process_item_id, signal=item_id) 70 | return pipe 71 | 72 | @classmethod 73 | def from_settings(cls, settings: Settings): 74 | """ 75 | 76 | :param settings: 77 | :type settings: Settings 78 | :return: 79 | :rtype: MongoPipeline 80 | """ 81 | uri = settings["PIPELINE_MONGO_URI"] 82 | return cls(uri=uri, settings=settings) 83 | 84 | def _get_args_from_settings(self, func: Callable) -> Dict[str, str]: 85 | """ 86 | 87 | :param func: 88 | :type func: Callable 89 | :return: 90 | :rtype: Dict[str, str] 91 | """ 92 | func_args = dict() 93 | for arg in get_args(func): 94 | key = "PIPELINE_MONGO_{arg}".format(arg=arg.upper()) 95 | if key in self.settings: 96 | func_args.update({arg: self.settings[key]}) 97 | return func_args 98 | 99 | def _get_callable(self, callable_: Callable, **kwargs): 100 | """ 101 | 102 | :param callable_: 103 | :param kwargs: 104 | :return: 105 | :rtype: 106 | """ 107 | args = self._get_args_from_settings(func=callable_) 108 | args.update(kwargs) 109 | return callable_(**args) 110 | 111 | @inlineCallbacks 112 | def open_spider(self, spider: Spider): 113 | """ 114 | 115 | :param spider: 116 | :type spider: Spider 117 | :return: 118 | :rtype: 119 | """ 120 | self.mongo = yield self._get_callable(ConnectionPool) 121 | self.database = yield self._get_callable( 122 | Database, 123 | factory=self.mongo, 124 | database_name=self.settings.get("PIPELINE_MONGO_DATABASE"), 125 | ) 126 | if all( 127 | ( 128 | self.settings.get("PIPELINE_MONGO_USERNAME"), 129 | self.settings.get("PIPELINE_MONGO_PASSWORD"), 130 | ) 131 | ): 132 | yield self._get_callable( 133 | self.database.authenticate, 134 | name=self.settings.get("PIPELINE_MONGO_USERNAME"), 135 | ) 136 | try: 137 | yield self.database.command("listCollections") 138 | except OperationFailure as err: 139 | LOGGER.error(str(err)) 140 | self.crawler.engine.close_spider(spider=spider, reason=str(err)) 141 | else: 142 | self.collection = yield self._get_callable( 143 | Collection, 144 | database=self.database, 145 | name=self.settings.get("PIPELINE_MONGO_COLLECTION"), 146 | ) 147 | yield self.create_indexes(spider=spider) 148 | 149 | LOGGER.info('MongoPipeline is opened with "%s"', self.uri) 150 | 151 | @inlineCallbacks 152 | def close_spider(self, spider: Spider): 153 | """ 154 | 155 | :param spider: 156 | :type spider: Spider 157 | :return: 158 | :rtype: 159 | """ 160 | yield self.mongo.disconnect() 161 | 162 | LOGGER.info('MongoPipeline is closed with "%s"', self.uri) 163 | 164 | @inlineCallbacks 165 | def create_indexes(self, spider: Spider): 166 | """ 167 | 168 | :param spider: 169 | :type spider: Spider 170 | :return: 171 | :rtype: 172 | """ 173 | indexes = self.settings.get("PIPELINE_MONGO_INDEXES", list()) 174 | for field, _order, *args in indexes: 175 | sort_fields = txsort(_order(field)) 176 | try: 177 | kwargs = args[0] 178 | except IndexError: 179 | kwargs = {} 180 | _ = yield self.collection.create_index(sort_fields, **kwargs) 181 | 182 | @inlineCallbacks 183 | def process_item(self, item: Item, spider: Spider) -> Item: 184 | """ 185 | 186 | :param item: 187 | :type item: Item 188 | :param spider: 189 | :type spider: Spider 190 | :return: 191 | :rtype: Item 192 | """ 193 | result = yield self.collection.insert_one(document=dict(item)) 194 | 195 | _item = self.item_completed(result, item, spider) 196 | 197 | return _item 198 | 199 | def item_completed(self, result: str, item: Item, spider: Spider) -> Item: 200 | """ 201 | 202 | :param result: 203 | :type result: str 204 | :param item: 205 | :type item: Item 206 | :param spider: 207 | :type spider: Spider 208 | :return: 209 | :rtype: Item 210 | """ 211 | return item 212 | 213 | @inlineCallbacks 214 | def process_item_id( 215 | self, signal: object, sender: Crawler, item: Item, spider: Spider 216 | ) -> InsertOneResult: 217 | """ 218 | 219 | :param signal: 220 | :type signal: object 221 | :param sender: 222 | :type sender: Crawler 223 | :param item: 224 | :type item: Item 225 | :param spider: 226 | :type spider: Spider 227 | :return: 228 | :rtype: InsertOneResult 229 | """ 230 | result = yield self.collection.insert_one(document=dict(item)) 231 | 232 | return result 233 | -------------------------------------------------------------------------------- /scrapy_pipelines/settings/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The utilities used in settings module 3 | """ 4 | from contextlib import contextmanager 5 | from typing import Generator 6 | 7 | from scrapy.settings import Settings 8 | 9 | 10 | @contextmanager 11 | def unfreeze_settings(settings: Settings) -> Generator[Settings, None, None]: 12 | """ 13 | 14 | :param settings: 15 | :type settings: Settings 16 | :return: 17 | :rtype: Generator[Settings, None, None] 18 | """ 19 | original_status, settings.frozen = settings.frozen, False 20 | try: 21 | yield settings 22 | finally: 23 | settings.frozen = original_status 24 | -------------------------------------------------------------------------------- /scrapy_pipelines/settings/default_settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains the default values for all settings used by this item 3 | pipeline. 4 | 5 | For more information about these settings you can read the settings 6 | documentation in docs/topics/settings.rst 7 | 8 | Scrapy developers, if you add a setting here remember to: 9 | 10 | * add it in alphabetical order 11 | * group similar settings without leaving blank lines 12 | * add its documentation to the available settings documentation 13 | (docs/topics/settings.rst) 14 | 15 | """ 16 | PIPELINE_MONGO_URI = "mongodb://127.0.0.1:27017" 17 | # PIPELINE_MONGO_POOL_SIZE = 1 18 | # PIPELINE_MONGO_SSL_CONTEXT_FACTORY = None 19 | # PIPELINE_MONGO_PING_INTERVAL = 10 20 | # PIPELINE_MONGO_PING_TIMEOUT = 10 21 | 22 | PIPELINE_MONGO_DATABASE = "scrapy_project_database" 23 | # PIPELINE_MONGO_WRITE_CONCERN = None 24 | # PIPELINE_MONGO_CODEC_OPTION = None 25 | 26 | PIPELINE_MONGO_USERNAME = "USERNAME" 27 | PIPELINE_MONGO_PASSWORD = "PASSWORD" 28 | # PIPELINE_MONGO_MECHANISM = "DEFAULT" 29 | 30 | PIPELINE_MONGO_COLLECTION = "scrapy_project_collection" 31 | 32 | # PIPELINE_MONGO_OPTIONS_ = "OPTIONS_" 33 | 34 | # PIPELINE_MONGO_INDEXES = "INDEXES" 35 | 36 | # PIPELINE_MONGO_PROCESS_ITEM = "PROCESS_ITEM" 37 | 38 | # from txmongo.filter import ASCENDING, DESCENDING 39 | # 40 | # PIPELINE_MONGO_INDEXES = [ 41 | # ("key_asc", ASCENDING, {"name": "index_key_asc"}), 42 | # ("key_des", DESCENDING, {"name": "index_key_des"}), 43 | # ("key_unique", DESCENDING, {"name": "index_key_unique", "unique": True}), 44 | # ] 45 | -------------------------------------------------------------------------------- /scrapy_pipelines/signals.py: -------------------------------------------------------------------------------- 1 | """ 2 | Signals for the pipelines 3 | """ 4 | item_id = object() 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = scrapy_pipelines/_version.py 5 | tag_prefix = 6 | parentdir_prefix = 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Python package configuration 3 | """ 4 | from setuptools import find_packages, setup 5 | 6 | import versioneer 7 | 8 | extras_require = {} 9 | 10 | with open("README.rst", "r") as fh: 11 | LONG_DESCRIPTION = fh.read() 12 | 13 | setup( 14 | name="Scrapy-Pipelines", 15 | version=versioneer.get_version(), 16 | cmdclass=versioneer.get_cmdclass(), 17 | url="https://github.com/scrapedia/scrapy-pipelines", 18 | description="A collection of scrapy item pipelines", 19 | long_description=LONG_DESCRIPTION, 20 | author="Scrapedia", 21 | author_email="Scrapedia@outlook.com", 22 | maintainer="Scrapedia", 23 | maintainer_email="Scrapedia@outlook.com", 24 | license="GPLv3", 25 | packages=find_packages(exclude=("tests", "tests.*")), 26 | include_package_data=True, 27 | zip_safe=False, 28 | classifiers=[ 29 | "Framework :: Scrapy", 30 | "Development Status :: 2 - Pre-Alpha", 31 | "Environment :: Plugins", 32 | "Intended Audience :: Developers", 33 | "Operating System :: OS Independent", 34 | "Programming Language :: Python", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.6", 37 | "Programming Language :: Python :: 3.7", 38 | "Topic :: Internet :: WWW/HTTP", 39 | "Topic :: Software Development :: Libraries :: Python Modules", 40 | ], 41 | install_requires=["scrapy", "txmongo"], 42 | extras_require=extras_require, 43 | ) 44 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapedia/scrapy-pipelines/667b87c8ff490e87d95d03ca0aaa715b9ceda47d/tests/__init__.py -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest==5.4.1 2 | pytest-cov==2.8.1 3 | pytest-xdist==1.32.0 -------------------------------------------------------------------------------- /tests/test_pipelines_mongo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test MongoPipeline 3 | """ 4 | from bson.son import SON 5 | from pymongo.results import InsertOneResult 6 | from scrapy import Spider 7 | from scrapy.item import Field, Item 8 | from scrapy.settings import Settings 9 | from twisted.internet.defer import inlineCallbacks 10 | from twisted.trial.unittest import TestCase 11 | from txmongo.filter import ASCENDING, DESCENDING 12 | 13 | from scrapy_pipelines.pipelines.mongo import MongoPipeline, get_args 14 | from scrapy_pipelines.settings import default_settings 15 | 16 | 17 | class TempItem(Item): 18 | """ 19 | A item class just for test purpose 20 | """ 21 | 22 | a = Field() 23 | b = Field() 24 | 25 | 26 | class TestGetArgs(TestCase): 27 | """ 28 | Test the functions in MongoPipeline 29 | """ 30 | 31 | def test_get_args(self): 32 | """ 33 | 34 | :return: 35 | """ 36 | 37 | def test_func(arg_1, arg_2, arg_3): 38 | return arg_1, arg_2, arg_3 39 | 40 | args = get_args(test_func) 41 | 42 | self.assertSequenceEqual(args, ["arg_1", "arg_2", "arg_3"]) 43 | 44 | 45 | class TestMongoPipeline(TestCase): 46 | """ 47 | Test MongoPipeline 48 | """ 49 | 50 | maxDiff = None 51 | mongo_settings = { 52 | "PIPELINE_MONGO_URI": "mongodb://127.0.0.1:27017", 53 | "PIPELINE_MONGO_DATABASE": "test_db", 54 | "PIPELINE_MONGO_USERNAME": "test_username", 55 | "PIPELINE_MONGO_PASSWORD": "test_password", 56 | "PIPELINE_MONGO_COLLECTION": "test_coll", 57 | "PIPELINE_MONGO_INDEXES": [ 58 | ("test", ASCENDING), 59 | ("test_asc", ASCENDING, {"name": "index_test_asc"}), 60 | ("test_des", DESCENDING, {"name": "index_test_des"}), 61 | ( 62 | "test_unique", 63 | DESCENDING, 64 | { 65 | "name": "index_test_unique", 66 | "unique": True, 67 | "partialFilterExpression": {"test_unique": {"$exists": True}}, 68 | }, 69 | ), 70 | ], 71 | } 72 | 73 | @inlineCallbacks 74 | def setUp(self) -> None: 75 | self.settings = Settings() 76 | self.settings.setmodule(module=default_settings) 77 | self.settings.setdict(self.mongo_settings) 78 | self.spider = Spider(name="TestMongoPipeline") 79 | self.pipe = MongoPipeline.from_settings(settings=self.settings) 80 | yield self.pipe.open_spider(spider=None) 81 | 82 | @inlineCallbacks 83 | def tearDown(self) -> None: 84 | yield self.pipe.close_spider(spider=None) 85 | 86 | @inlineCallbacks 87 | def test_create_indexes(self) -> None: 88 | """ 89 | 90 | :return: 91 | """ 92 | _index_info = { 93 | "_id_": { 94 | "key": SON([("_id", 1)]), 95 | "name": "_id_", 96 | "ns": "test_db.test_coll", 97 | "v": 2, 98 | }, 99 | "test_1": { 100 | "key": SON([("test", 1)]), 101 | "name": "test_1", 102 | "ns": "test_db.test_coll", 103 | "v": 2, 104 | }, 105 | "index_test_asc": { 106 | "key": SON([("test_asc", 1)]), 107 | "name": "index_test_asc", 108 | "ns": "test_db.test_coll", 109 | "v": 2, 110 | }, 111 | "index_test_des": { 112 | "key": SON([("test_des", -1)]), 113 | "name": "index_test_des", 114 | "ns": "test_db.test_coll", 115 | "v": 2, 116 | }, 117 | "index_test_unique": { 118 | "key": SON([("test_unique", -1)]), 119 | "name": "index_test_unique", 120 | "ns": "test_db.test_coll", 121 | "partialFilterExpression": {"test_unique": {"$exists": True}}, 122 | "unique": True, 123 | "v": 2, 124 | }, 125 | } 126 | index_info = yield self.pipe.collection.index_information() 127 | self.assertDictEqual(index_info, _index_info) 128 | 129 | @inlineCallbacks 130 | def test_process_item(self): 131 | """ 132 | 133 | :return: 134 | """ 135 | item = TempItem({"a": 0, "b": 1}) 136 | result = yield self.pipe.process_item(item=item, spider=self.spider) 137 | 138 | self.assertDictEqual(dict(result), dict(item)) 139 | 140 | def test_item_completed(self): 141 | """ 142 | 143 | :return: 144 | """ 145 | _item = TempItem({"a": 2, "b": 3}) 146 | item = self.pipe.item_completed(None, _item, None) 147 | self.assertDictEqual(dict(_item), dict(item)) 148 | 149 | @inlineCallbacks 150 | def test_process_item_id(self): 151 | """ 152 | 153 | :return: 154 | """ 155 | item = TempItem({"a": 4, "b": 5}) 156 | result = yield self.pipe.process_item_id( 157 | signal=object(), sender=None, item=item, spider=self.spider 158 | ) 159 | 160 | self.assertIsInstance(result, InsertOneResult) 161 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | test the functions in settings 3 | """ 4 | from unittest import TestCase 5 | 6 | from scrapy.settings import Settings 7 | 8 | from scrapy_pipelines.settings import unfreeze_settings 9 | 10 | 11 | class TestSettings(TestCase): 12 | """ 13 | Test the functions in default settings 14 | """ 15 | 16 | def setUp(self) -> None: 17 | self.settings = Settings() 18 | self.settings.freeze() 19 | 20 | def test_unfreeze_settings_succeed(self): 21 | """ 22 | 23 | :return: 24 | """ 25 | self.assertEqual(self.settings.frozen, True) 26 | with unfreeze_settings(self.settings): 27 | self.assertEqual(self.settings.frozen, False) 28 | self.assertEqual(self.settings.frozen, True) 29 | 30 | def test_unfreeze_settings_failed(self): 31 | """ 32 | 33 | :return: 34 | """ 35 | with self.assertRaises(Exception): 36 | with unfreeze_settings(self.settings): 37 | raise Exception 38 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py36,py37 3 | 4 | [testenv] 5 | commands = 6 | pytest 7 | deps = 8 | -rrequirements.txt 9 | -rtests/requirements.txt 10 | passenv = 11 | PYTHONPATH 12 | 13 | [docs] 14 | changedir = docs 15 | deps = 16 | -rdocs/requirements.txt 17 | 18 | [testenv:docs] 19 | changedir = {[docs]changedir} 20 | commands = 21 | sphinx-build -W -b html source {envtmpdir}/html 22 | deps = {[docs]deps} 23 | 24 | [testenv:docs-coverage] 25 | changedir = {[docs]changedir} 26 | commands = 27 | sphinx-build -b coverage source {envtmpdir}/coverage 28 | deps = {[docs]deps} 29 | 30 | [testenv:docs-links] 31 | changedir = {[docs]changedir} 32 | commands = 33 | sphinx-build -W -b linkcheck source {envtmpdir}/linkcheck 34 | deps = {[docs]deps} 35 | -------------------------------------------------------------------------------- /versioneer.py: -------------------------------------------------------------------------------- 1 | 2 | # Version: 0.18 3 | 4 | """The Versioneer - like a rocketeer, but for versions. 5 | 6 | The Versioneer 7 | ============== 8 | 9 | * like a rocketeer, but for versions! 10 | * https://github.com/warner/python-versioneer 11 | * Brian Warner 12 | * License: Public Domain 13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy 14 | * [![Latest Version] 15 | (https://pypip.in/version/versioneer/badge.svg?style=flat) 16 | ](https://pypi.python.org/pypi/versioneer/) 17 | * [![Build Status] 18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master) 19 | ](https://travis-ci.org/warner/python-versioneer) 20 | 21 | This is a tool for managing a recorded version number in distutils-based 22 | python projects. The goal is to remove the tedious and error-prone "update 23 | the embedded version string" step from your release process. Making a new 24 | release should be as easy as recording a new tag in your version-control 25 | system, and maybe making new tarballs. 26 | 27 | 28 | ## Quick Install 29 | 30 | * `pip install versioneer` to somewhere to your $PATH 31 | * add a `[versioneer]` section to your setup.cfg (see below) 32 | * run `versioneer install` in your source tree, commit the results 33 | 34 | ## Version Identifiers 35 | 36 | Source trees come from a variety of places: 37 | 38 | * a version-control system checkout (mostly used by developers) 39 | * a nightly tarball, produced by build automation 40 | * a snapshot tarball, produced by a web-based VCS browser, like github's 41 | "tarball from tag" feature 42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI 43 | 44 | Within each source tree, the version identifier (either a string or a number, 45 | this tool is format-agnostic) can come from a variety of places: 46 | 47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows 48 | about recent "tags" and an absolute revision-id 49 | * the name of the directory into which the tarball was unpacked 50 | * an expanded VCS keyword ($Id$, etc) 51 | * a `_version.py` created by some earlier build step 52 | 53 | For released software, the version identifier is closely related to a VCS 54 | tag. Some projects use tag names that include more than just the version 55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool 56 | needs to strip the tag prefix to extract the version identifier. For 57 | unreleased software (between tags), the version identifier should provide 58 | enough information to help developers recreate the same tree, while also 59 | giving them an idea of roughly how old the tree is (after version 1.2, before 60 | version 1.3). Many VCS systems can report a description that captures this, 61 | for example `git describe --tags --dirty --always` reports things like 62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the 63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has 64 | uncommitted changes. 65 | 66 | The version identifier is used for multiple purposes: 67 | 68 | * to allow the module to self-identify its version: `myproject.__version__` 69 | * to choose a name and prefix for a 'setup.py sdist' tarball 70 | 71 | ## Theory of Operation 72 | 73 | Versioneer works by adding a special `_version.py` file into your source 74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to 75 | dynamically ask the VCS tool for version information at import time. 76 | 77 | `_version.py` also contains `$Revision$` markers, and the installation 78 | process marks `_version.py` to have this marker rewritten with a tag name 79 | during the `git archive` command. As a result, generated tarballs will 80 | contain enough information to get the proper version. 81 | 82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to 83 | the top level of your source tree, next to `setup.py` and the `setup.cfg` 84 | that configures it. This overrides several distutils/setuptools commands to 85 | compute the version when invoked, and changes `setup.py build` and `setup.py 86 | sdist` to replace `_version.py` with a small static file that contains just 87 | the generated version data. 88 | 89 | ## Installation 90 | 91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions. 92 | 93 | ## Version-String Flavors 94 | 95 | Code which uses Versioneer can learn about its version string at runtime by 96 | importing `_version` from your main `__init__.py` file and running the 97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can 98 | import the top-level `versioneer.py` and run `get_versions()`. 99 | 100 | Both functions return a dictionary with different flavors of version 101 | information: 102 | 103 | * `['version']`: A condensed version string, rendered using the selected 104 | style. This is the most commonly used value for the project's version 105 | string. The default "pep440" style yields strings like `0.11`, 106 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section 107 | below for alternative styles. 108 | 109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the 110 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac". 111 | 112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the 113 | commit date in ISO 8601 format. This will be None if the date is not 114 | available. 115 | 116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that 117 | this is only accurate if run in a VCS checkout, otherwise it is likely to 118 | be False or None 119 | 120 | * `['error']`: if the version string could not be computed, this will be set 121 | to a string describing the problem, otherwise it will be None. It may be 122 | useful to throw an exception in setup.py if this is set, to avoid e.g. 123 | creating tarballs with a version string of "unknown". 124 | 125 | Some variants are more useful than others. Including `full-revisionid` in a 126 | bug report should allow developers to reconstruct the exact code being tested 127 | (or indicate the presence of local changes that should be shared with the 128 | developers). `version` is suitable for display in an "about" box or a CLI 129 | `--version` output: it can be easily compared against release notes and lists 130 | of bugs fixed in various releases. 131 | 132 | The installer adds the following text to your `__init__.py` to place a basic 133 | version in `YOURPROJECT.__version__`: 134 | 135 | from ._version import get_versions 136 | __version__ = get_versions()['version'] 137 | del get_versions 138 | 139 | ## Styles 140 | 141 | The setup.cfg `style=` configuration controls how the VCS information is 142 | rendered into a version string. 143 | 144 | The default style, "pep440", produces a PEP440-compliant string, equal to the 145 | un-prefixed tag name for actual releases, and containing an additional "local 146 | version" section with more detail for in-between builds. For Git, this is 147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags 148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the 149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and 150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released 151 | software (exactly equal to a known tag), the identifier will only contain the 152 | stripped tag, e.g. "0.11". 153 | 154 | Other styles are available. See [details.md](details.md) in the Versioneer 155 | source tree for descriptions. 156 | 157 | ## Debugging 158 | 159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend 160 | to return a version of "0+unknown". To investigate the problem, run `setup.py 161 | version`, which will run the version-lookup code in a verbose mode, and will 162 | display the full contents of `get_versions()` (including the `error` string, 163 | which may help identify what went wrong). 164 | 165 | ## Known Limitations 166 | 167 | Some situations are known to cause problems for Versioneer. This details the 168 | most significant ones. More can be found on Github 169 | [issues page](https://github.com/warner/python-versioneer/issues). 170 | 171 | ### Subprojects 172 | 173 | Versioneer has limited support for source trees in which `setup.py` is not in 174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are 175 | two common reasons why `setup.py` might not be in the root: 176 | 177 | * Source trees which contain multiple subprojects, such as 178 | [Buildbot](https://github.com/buildbot/buildbot), which contains both 179 | "master" and "slave" subprojects, each with their own `setup.py`, 180 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI 181 | distributions (and upload multiple independently-installable tarballs). 182 | * Source trees whose main purpose is to contain a C library, but which also 183 | provide bindings to Python (and perhaps other langauges) in subdirectories. 184 | 185 | Versioneer will look for `.git` in parent directories, and most operations 186 | should get the right version string. However `pip` and `setuptools` have bugs 187 | and implementation details which frequently cause `pip install .` from a 188 | subproject directory to fail to find a correct version string (so it usually 189 | defaults to `0+unknown`). 190 | 191 | `pip install --editable .` should work correctly. `setup.py install` might 192 | work too. 193 | 194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in 195 | some later version. 196 | 197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking 198 | this issue. The discussion in 199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the 200 | issue from the Versioneer side in more detail. 201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and 202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve 203 | pip to let Versioneer work correctly. 204 | 205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the 206 | `setup.cfg`, so subprojects were completely unsupported with those releases. 207 | 208 | ### Editable installs with setuptools <= 18.5 209 | 210 | `setup.py develop` and `pip install --editable .` allow you to install a 211 | project into a virtualenv once, then continue editing the source code (and 212 | test) without re-installing after every change. 213 | 214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a 215 | convenient way to specify executable scripts that should be installed along 216 | with the python package. 217 | 218 | These both work as expected when using modern setuptools. When using 219 | setuptools-18.5 or earlier, however, certain operations will cause 220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint 221 | script, which must be resolved by re-installing the package. This happens 222 | when the install happens with one version, then the egg_info data is 223 | regenerated while a different version is checked out. Many setup.py commands 224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into 225 | a different virtualenv), so this can be surprising. 226 | 227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes 228 | this one, but upgrading to a newer version of setuptools should probably 229 | resolve it. 230 | 231 | ### Unicode version strings 232 | 233 | While Versioneer works (and is continually tested) with both Python 2 and 234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions. 235 | Newer releases probably generate unicode version strings on py2. It's not 236 | clear that this is wrong, but it may be surprising for applications when then 237 | write these strings to a network connection or include them in bytes-oriented 238 | APIs like cryptographic checksums. 239 | 240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates 241 | this question. 242 | 243 | 244 | ## Updating Versioneer 245 | 246 | To upgrade your project to a new release of Versioneer, do the following: 247 | 248 | * install the new Versioneer (`pip install -U versioneer` or equivalent) 249 | * edit `setup.cfg`, if necessary, to include any new configuration settings 250 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. 251 | * re-run `versioneer install` in your source tree, to replace 252 | `SRC/_version.py` 253 | * commit any changed files 254 | 255 | ## Future Directions 256 | 257 | This tool is designed to make it easily extended to other version-control 258 | systems: all VCS-specific components are in separate directories like 259 | src/git/ . The top-level `versioneer.py` script is assembled from these 260 | components by running make-versioneer.py . In the future, make-versioneer.py 261 | will take a VCS name as an argument, and will construct a version of 262 | `versioneer.py` that is specific to the given VCS. It might also take the 263 | configuration arguments that are currently provided manually during 264 | installation by editing setup.py . Alternatively, it might go the other 265 | direction and include code from all supported VCS systems, reducing the 266 | number of intermediate scripts. 267 | 268 | 269 | ## License 270 | 271 | To make Versioneer easier to embed, all its code is dedicated to the public 272 | domain. The `_version.py` that it creates is also in the public domain. 273 | Specifically, both are released under the Creative Commons "Public Domain 274 | Dedication" license (CC0-1.0), as described in 275 | https://creativecommons.org/publicdomain/zero/1.0/ . 276 | 277 | """ 278 | 279 | from __future__ import print_function 280 | try: 281 | import configparser 282 | except ImportError: 283 | import ConfigParser as configparser 284 | import errno 285 | import json 286 | import os 287 | import re 288 | import subprocess 289 | import sys 290 | 291 | 292 | class VersioneerConfig: 293 | """Container for Versioneer configuration parameters.""" 294 | 295 | 296 | def get_root(): 297 | """Get the project root directory. 298 | 299 | We require that all commands are run from the project root, i.e. the 300 | directory that contains setup.py, setup.cfg, and versioneer.py . 301 | """ 302 | root = os.path.realpath(os.path.abspath(os.getcwd())) 303 | setup_py = os.path.join(root, "setup.py") 304 | versioneer_py = os.path.join(root, "versioneer.py") 305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 306 | # allow 'python path/to/setup.py COMMAND' 307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0]))) 308 | setup_py = os.path.join(root, "setup.py") 309 | versioneer_py = os.path.join(root, "versioneer.py") 310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): 311 | err = ("Versioneer was unable to run the project root directory. " 312 | "Versioneer requires setup.py to be executed from " 313 | "its immediate directory (like 'python setup.py COMMAND'), " 314 | "or in a way that lets it use sys.argv[0] to find the root " 315 | "(like 'python path/to/setup.py COMMAND').") 316 | raise VersioneerBadRootError(err) 317 | try: 318 | # Certain runtime workflows (setup.py install/develop in a setuptools 319 | # tree) execute all dependencies in a single python process, so 320 | # "versioneer" may be imported multiple times, and python's shared 321 | # module-import table will cache the first one. So we can't use 322 | # os.path.dirname(__file__), as that will find whichever 323 | # versioneer.py was first imported, even in later projects. 324 | me = os.path.realpath(os.path.abspath(__file__)) 325 | me_dir = os.path.normcase(os.path.splitext(me)[0]) 326 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) 327 | if me_dir != vsr_dir: 328 | print("Warning: build in %s is using versioneer.py from %s" 329 | % (os.path.dirname(me), versioneer_py)) 330 | except NameError: 331 | pass 332 | return root 333 | 334 | 335 | def get_config_from_root(root): 336 | """Read the project setup.cfg file to determine Versioneer config.""" 337 | # This might raise EnvironmentError (if setup.cfg is missing), or 338 | # configparser.NoSectionError (if it lacks a [versioneer] section), or 339 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at 340 | # the top of versioneer.py for instructions on writing your setup.cfg . 341 | setup_cfg = os.path.join(root, "setup.cfg") 342 | parser = configparser.SafeConfigParser() 343 | with open(setup_cfg, "r") as f: 344 | parser.readfp(f) 345 | VCS = parser.get("versioneer", "VCS") # mandatory 346 | 347 | def get(parser, name): 348 | if parser.has_option("versioneer", name): 349 | return parser.get("versioneer", name) 350 | return None 351 | cfg = VersioneerConfig() 352 | cfg.VCS = VCS 353 | cfg.style = get(parser, "style") or "" 354 | cfg.versionfile_source = get(parser, "versionfile_source") 355 | cfg.versionfile_build = get(parser, "versionfile_build") 356 | cfg.tag_prefix = get(parser, "tag_prefix") 357 | if cfg.tag_prefix in ("''", '""'): 358 | cfg.tag_prefix = "" 359 | cfg.parentdir_prefix = get(parser, "parentdir_prefix") 360 | cfg.verbose = get(parser, "verbose") 361 | return cfg 362 | 363 | 364 | class NotThisMethod(Exception): 365 | """Exception raised if a method is not valid for the current scenario.""" 366 | 367 | 368 | # these dictionaries contain VCS-specific tools 369 | LONG_VERSION_PY = {} 370 | HANDLERS = {} 371 | 372 | 373 | def register_vcs_handler(vcs, method): # decorator 374 | """Decorator to mark a method as the handler for a particular VCS.""" 375 | def decorate(f): 376 | """Store f in HANDLERS[vcs][method].""" 377 | if vcs not in HANDLERS: 378 | HANDLERS[vcs] = {} 379 | HANDLERS[vcs][method] = f 380 | return f 381 | return decorate 382 | 383 | 384 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 385 | env=None): 386 | """Call the given command(s).""" 387 | assert isinstance(commands, list) 388 | p = None 389 | for c in commands: 390 | try: 391 | dispcmd = str([c] + args) 392 | # remember shell=False, so use git.cmd on windows, not just git 393 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 394 | stdout=subprocess.PIPE, 395 | stderr=(subprocess.PIPE if hide_stderr 396 | else None)) 397 | break 398 | except EnvironmentError: 399 | e = sys.exc_info()[1] 400 | if e.errno == errno.ENOENT: 401 | continue 402 | if verbose: 403 | print("unable to run %s" % dispcmd) 404 | print(e) 405 | return None, None 406 | else: 407 | if verbose: 408 | print("unable to find command, tried %s" % (commands,)) 409 | return None, None 410 | stdout = p.communicate()[0].strip() 411 | if sys.version_info[0] >= 3: 412 | stdout = stdout.decode() 413 | if p.returncode != 0: 414 | if verbose: 415 | print("unable to run %s (error)" % dispcmd) 416 | print("stdout was %s" % stdout) 417 | return None, p.returncode 418 | return stdout, p.returncode 419 | 420 | 421 | LONG_VERSION_PY['git'] = ''' 422 | # This file helps to compute a version number in source trees obtained from 423 | # git-archive tarball (such as those provided by githubs download-from-tag 424 | # feature). Distribution tarballs (built by setup.py sdist) and build 425 | # directories (produced by setup.py build) will contain a much shorter file 426 | # that just contains the computed version number. 427 | 428 | # This file is released into the public domain. Generated by 429 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 430 | 431 | """Git implementation of _version.py.""" 432 | 433 | import errno 434 | import os 435 | import re 436 | import subprocess 437 | import sys 438 | 439 | 440 | def get_keywords(): 441 | """Get the keywords needed to look up the version information.""" 442 | # these strings will be replaced by git during git-archive. 443 | # setup.py/versioneer.py will grep for the variable names, so they must 444 | # each be defined on a line of their own. _version.py will just call 445 | # get_keywords(). 446 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s" 447 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s" 448 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s" 449 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 450 | return keywords 451 | 452 | 453 | class VersioneerConfig: 454 | """Container for Versioneer configuration parameters.""" 455 | 456 | 457 | def get_config(): 458 | """Create, populate and return the VersioneerConfig() object.""" 459 | # these strings are filled in when 'setup.py versioneer' creates 460 | # _version.py 461 | cfg = VersioneerConfig() 462 | cfg.VCS = "git" 463 | cfg.style = "%(STYLE)s" 464 | cfg.tag_prefix = "%(TAG_PREFIX)s" 465 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s" 466 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s" 467 | cfg.verbose = False 468 | return cfg 469 | 470 | 471 | class NotThisMethod(Exception): 472 | """Exception raised if a method is not valid for the current scenario.""" 473 | 474 | 475 | LONG_VERSION_PY = {} 476 | HANDLERS = {} 477 | 478 | 479 | def register_vcs_handler(vcs, method): # decorator 480 | """Decorator to mark a method as the handler for a particular VCS.""" 481 | def decorate(f): 482 | """Store f in HANDLERS[vcs][method].""" 483 | if vcs not in HANDLERS: 484 | HANDLERS[vcs] = {} 485 | HANDLERS[vcs][method] = f 486 | return f 487 | return decorate 488 | 489 | 490 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 491 | env=None): 492 | """Call the given command(s).""" 493 | assert isinstance(commands, list) 494 | p = None 495 | for c in commands: 496 | try: 497 | dispcmd = str([c] + args) 498 | # remember shell=False, so use git.cmd on windows, not just git 499 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 500 | stdout=subprocess.PIPE, 501 | stderr=(subprocess.PIPE if hide_stderr 502 | else None)) 503 | break 504 | except EnvironmentError: 505 | e = sys.exc_info()[1] 506 | if e.errno == errno.ENOENT: 507 | continue 508 | if verbose: 509 | print("unable to run %%s" %% dispcmd) 510 | print(e) 511 | return None, None 512 | else: 513 | if verbose: 514 | print("unable to find command, tried %%s" %% (commands,)) 515 | return None, None 516 | stdout = p.communicate()[0].strip() 517 | if sys.version_info[0] >= 3: 518 | stdout = stdout.decode() 519 | if p.returncode != 0: 520 | if verbose: 521 | print("unable to run %%s (error)" %% dispcmd) 522 | print("stdout was %%s" %% stdout) 523 | return None, p.returncode 524 | return stdout, p.returncode 525 | 526 | 527 | def versions_from_parentdir(parentdir_prefix, root, verbose): 528 | """Try to determine the version from the parent directory name. 529 | 530 | Source tarballs conventionally unpack into a directory that includes both 531 | the project name and a version string. We will also support searching up 532 | two directory levels for an appropriately named parent directory 533 | """ 534 | rootdirs = [] 535 | 536 | for i in range(3): 537 | dirname = os.path.basename(root) 538 | if dirname.startswith(parentdir_prefix): 539 | return {"version": dirname[len(parentdir_prefix):], 540 | "full-revisionid": None, 541 | "dirty": False, "error": None, "date": None} 542 | else: 543 | rootdirs.append(root) 544 | root = os.path.dirname(root) # up a level 545 | 546 | if verbose: 547 | print("Tried directories %%s but none started with prefix %%s" %% 548 | (str(rootdirs), parentdir_prefix)) 549 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 550 | 551 | 552 | @register_vcs_handler("git", "get_keywords") 553 | def git_get_keywords(versionfile_abs): 554 | """Extract version information from the given file.""" 555 | # the code embedded in _version.py can just fetch the value of these 556 | # keywords. When used from setup.py, we don't want to import _version.py, 557 | # so we do it with a regexp instead. This function is not used from 558 | # _version.py. 559 | keywords = {} 560 | try: 561 | f = open(versionfile_abs, "r") 562 | for line in f.readlines(): 563 | if line.strip().startswith("git_refnames ="): 564 | mo = re.search(r'=\s*"(.*)"', line) 565 | if mo: 566 | keywords["refnames"] = mo.group(1) 567 | if line.strip().startswith("git_full ="): 568 | mo = re.search(r'=\s*"(.*)"', line) 569 | if mo: 570 | keywords["full"] = mo.group(1) 571 | if line.strip().startswith("git_date ="): 572 | mo = re.search(r'=\s*"(.*)"', line) 573 | if mo: 574 | keywords["date"] = mo.group(1) 575 | f.close() 576 | except EnvironmentError: 577 | pass 578 | return keywords 579 | 580 | 581 | @register_vcs_handler("git", "keywords") 582 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 583 | """Get version information from git keywords.""" 584 | if not keywords: 585 | raise NotThisMethod("no keywords at all, weird") 586 | date = keywords.get("date") 587 | if date is not None: 588 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant 589 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601 590 | # -like" string, which we must then edit to make compliant), because 591 | # it's been around since git-1.5.3, and it's too difficult to 592 | # discover which version we're using, or to work around using an 593 | # older one. 594 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 595 | refnames = keywords["refnames"].strip() 596 | if refnames.startswith("$Format"): 597 | if verbose: 598 | print("keywords are unexpanded, not using") 599 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 600 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 601 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 602 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 603 | TAG = "tag: " 604 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 605 | if not tags: 606 | # Either we're using git < 1.8.3, or there really are no tags. We use 607 | # a heuristic: assume all version tags have a digit. The old git %%d 608 | # expansion behaves like git log --decorate=short and strips out the 609 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 610 | # between branches and tags. By ignoring refnames without digits, we 611 | # filter out many common branch names like "release" and 612 | # "stabilization", as well as "HEAD" and "master". 613 | tags = set([r for r in refs if re.search(r'\d', r)]) 614 | if verbose: 615 | print("discarding '%%s', no digits" %% ",".join(refs - tags)) 616 | if verbose: 617 | print("likely tags: %%s" %% ",".join(sorted(tags))) 618 | for ref in sorted(tags): 619 | # sorting will prefer e.g. "2.0" over "2.0rc1" 620 | if ref.startswith(tag_prefix): 621 | r = ref[len(tag_prefix):] 622 | if verbose: 623 | print("picking %%s" %% r) 624 | return {"version": r, 625 | "full-revisionid": keywords["full"].strip(), 626 | "dirty": False, "error": None, 627 | "date": date} 628 | # no suitable tags, so version is "0+unknown", but full hex is still there 629 | if verbose: 630 | print("no suitable tags, using unknown + full revision id") 631 | return {"version": "0+unknown", 632 | "full-revisionid": keywords["full"].strip(), 633 | "dirty": False, "error": "no suitable tags", "date": None} 634 | 635 | 636 | @register_vcs_handler("git", "pieces_from_vcs") 637 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 638 | """Get version from 'git describe' in the root of the source tree. 639 | 640 | This only gets called if the git-archive 'subst' keywords were *not* 641 | expanded, and _version.py hasn't already been rewritten with a short 642 | version string, meaning we're inside a checked out source tree. 643 | """ 644 | GITS = ["git"] 645 | if sys.platform == "win32": 646 | GITS = ["git.cmd", "git.exe"] 647 | 648 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 649 | hide_stderr=True) 650 | if rc != 0: 651 | if verbose: 652 | print("Directory %%s not under git control" %% root) 653 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 654 | 655 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 656 | # if there isn't one, this yields HEX[-dirty] (no NUM) 657 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 658 | "--always", "--long", 659 | "--match", "%%s*" %% tag_prefix], 660 | cwd=root) 661 | # --long was added in git-1.5.5 662 | if describe_out is None: 663 | raise NotThisMethod("'git describe' failed") 664 | describe_out = describe_out.strip() 665 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 666 | if full_out is None: 667 | raise NotThisMethod("'git rev-parse' failed") 668 | full_out = full_out.strip() 669 | 670 | pieces = {} 671 | pieces["long"] = full_out 672 | pieces["short"] = full_out[:7] # maybe improved later 673 | pieces["error"] = None 674 | 675 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 676 | # TAG might have hyphens. 677 | git_describe = describe_out 678 | 679 | # look for -dirty suffix 680 | dirty = git_describe.endswith("-dirty") 681 | pieces["dirty"] = dirty 682 | if dirty: 683 | git_describe = git_describe[:git_describe.rindex("-dirty")] 684 | 685 | # now we have TAG-NUM-gHEX or HEX 686 | 687 | if "-" in git_describe: 688 | # TAG-NUM-gHEX 689 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 690 | if not mo: 691 | # unparseable. Maybe git-describe is misbehaving? 692 | pieces["error"] = ("unable to parse git-describe output: '%%s'" 693 | %% describe_out) 694 | return pieces 695 | 696 | # tag 697 | full_tag = mo.group(1) 698 | if not full_tag.startswith(tag_prefix): 699 | if verbose: 700 | fmt = "tag '%%s' doesn't start with prefix '%%s'" 701 | print(fmt %% (full_tag, tag_prefix)) 702 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'" 703 | %% (full_tag, tag_prefix)) 704 | return pieces 705 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 706 | 707 | # distance: number of commits since tag 708 | pieces["distance"] = int(mo.group(2)) 709 | 710 | # commit: short hex revision ID 711 | pieces["short"] = mo.group(3) 712 | 713 | else: 714 | # HEX: no tags 715 | pieces["closest-tag"] = None 716 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 717 | cwd=root) 718 | pieces["distance"] = int(count_out) # total number of commits 719 | 720 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 721 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"], 722 | cwd=root)[0].strip() 723 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 724 | 725 | return pieces 726 | 727 | 728 | def plus_or_dot(pieces): 729 | """Return a + if we don't already have one, else return a .""" 730 | if "+" in pieces.get("closest-tag", ""): 731 | return "." 732 | return "+" 733 | 734 | 735 | def render_pep440(pieces): 736 | """Build up version string, with post-release "local version identifier". 737 | 738 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 739 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 740 | 741 | Exceptions: 742 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 743 | """ 744 | if pieces["closest-tag"]: 745 | rendered = pieces["closest-tag"] 746 | if pieces["distance"] or pieces["dirty"]: 747 | rendered += plus_or_dot(pieces) 748 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"]) 749 | if pieces["dirty"]: 750 | rendered += ".dirty" 751 | else: 752 | # exception #1 753 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"], 754 | pieces["short"]) 755 | if pieces["dirty"]: 756 | rendered += ".dirty" 757 | return rendered 758 | 759 | 760 | def render_pep440_pre(pieces): 761 | """TAG[.post.devDISTANCE] -- No -dirty. 762 | 763 | Exceptions: 764 | 1: no tags. 0.post.devDISTANCE 765 | """ 766 | if pieces["closest-tag"]: 767 | rendered = pieces["closest-tag"] 768 | if pieces["distance"]: 769 | rendered += ".post.dev%%d" %% pieces["distance"] 770 | else: 771 | # exception #1 772 | rendered = "0.post.dev%%d" %% pieces["distance"] 773 | return rendered 774 | 775 | 776 | def render_pep440_post(pieces): 777 | """TAG[.postDISTANCE[.dev0]+gHEX] . 778 | 779 | The ".dev0" means dirty. Note that .dev0 sorts backwards 780 | (a dirty tree will appear "older" than the corresponding clean one), 781 | but you shouldn't be releasing software with -dirty anyways. 782 | 783 | Exceptions: 784 | 1: no tags. 0.postDISTANCE[.dev0] 785 | """ 786 | if pieces["closest-tag"]: 787 | rendered = pieces["closest-tag"] 788 | if pieces["distance"] or pieces["dirty"]: 789 | rendered += ".post%%d" %% pieces["distance"] 790 | if pieces["dirty"]: 791 | rendered += ".dev0" 792 | rendered += plus_or_dot(pieces) 793 | rendered += "g%%s" %% pieces["short"] 794 | else: 795 | # exception #1 796 | rendered = "0.post%%d" %% pieces["distance"] 797 | if pieces["dirty"]: 798 | rendered += ".dev0" 799 | rendered += "+g%%s" %% pieces["short"] 800 | return rendered 801 | 802 | 803 | def render_pep440_old(pieces): 804 | """TAG[.postDISTANCE[.dev0]] . 805 | 806 | The ".dev0" means dirty. 807 | 808 | Eexceptions: 809 | 1: no tags. 0.postDISTANCE[.dev0] 810 | """ 811 | if pieces["closest-tag"]: 812 | rendered = pieces["closest-tag"] 813 | if pieces["distance"] or pieces["dirty"]: 814 | rendered += ".post%%d" %% pieces["distance"] 815 | if pieces["dirty"]: 816 | rendered += ".dev0" 817 | else: 818 | # exception #1 819 | rendered = "0.post%%d" %% pieces["distance"] 820 | if pieces["dirty"]: 821 | rendered += ".dev0" 822 | return rendered 823 | 824 | 825 | def render_git_describe(pieces): 826 | """TAG[-DISTANCE-gHEX][-dirty]. 827 | 828 | Like 'git describe --tags --dirty --always'. 829 | 830 | Exceptions: 831 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 832 | """ 833 | if pieces["closest-tag"]: 834 | rendered = pieces["closest-tag"] 835 | if pieces["distance"]: 836 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 837 | else: 838 | # exception #1 839 | rendered = pieces["short"] 840 | if pieces["dirty"]: 841 | rendered += "-dirty" 842 | return rendered 843 | 844 | 845 | def render_git_describe_long(pieces): 846 | """TAG-DISTANCE-gHEX[-dirty]. 847 | 848 | Like 'git describe --tags --dirty --always -long'. 849 | The distance/hash is unconditional. 850 | 851 | Exceptions: 852 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 853 | """ 854 | if pieces["closest-tag"]: 855 | rendered = pieces["closest-tag"] 856 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"]) 857 | else: 858 | # exception #1 859 | rendered = pieces["short"] 860 | if pieces["dirty"]: 861 | rendered += "-dirty" 862 | return rendered 863 | 864 | 865 | def render(pieces, style): 866 | """Render the given version pieces into the requested style.""" 867 | if pieces["error"]: 868 | return {"version": "unknown", 869 | "full-revisionid": pieces.get("long"), 870 | "dirty": None, 871 | "error": pieces["error"], 872 | "date": None} 873 | 874 | if not style or style == "default": 875 | style = "pep440" # the default 876 | 877 | if style == "pep440": 878 | rendered = render_pep440(pieces) 879 | elif style == "pep440-pre": 880 | rendered = render_pep440_pre(pieces) 881 | elif style == "pep440-post": 882 | rendered = render_pep440_post(pieces) 883 | elif style == "pep440-old": 884 | rendered = render_pep440_old(pieces) 885 | elif style == "git-describe": 886 | rendered = render_git_describe(pieces) 887 | elif style == "git-describe-long": 888 | rendered = render_git_describe_long(pieces) 889 | else: 890 | raise ValueError("unknown style '%%s'" %% style) 891 | 892 | return {"version": rendered, "full-revisionid": pieces["long"], 893 | "dirty": pieces["dirty"], "error": None, 894 | "date": pieces.get("date")} 895 | 896 | 897 | def get_versions(): 898 | """Get version information or return default if unable to do so.""" 899 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 900 | # __file__, we can work backwards from there to the root. Some 901 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 902 | # case we can only use expanded keywords. 903 | 904 | cfg = get_config() 905 | verbose = cfg.verbose 906 | 907 | try: 908 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 909 | verbose) 910 | except NotThisMethod: 911 | pass 912 | 913 | try: 914 | root = os.path.realpath(__file__) 915 | # versionfile_source is the relative path from the top of the source 916 | # tree (where the .git directory might live) to this file. Invert 917 | # this to find the root from __file__. 918 | for i in cfg.versionfile_source.split('/'): 919 | root = os.path.dirname(root) 920 | except NameError: 921 | return {"version": "0+unknown", "full-revisionid": None, 922 | "dirty": None, 923 | "error": "unable to find root of source tree", 924 | "date": None} 925 | 926 | try: 927 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 928 | return render(pieces, cfg.style) 929 | except NotThisMethod: 930 | pass 931 | 932 | try: 933 | if cfg.parentdir_prefix: 934 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 935 | except NotThisMethod: 936 | pass 937 | 938 | return {"version": "0+unknown", "full-revisionid": None, 939 | "dirty": None, 940 | "error": "unable to compute version", "date": None} 941 | ''' 942 | 943 | 944 | @register_vcs_handler("git", "get_keywords") 945 | def git_get_keywords(versionfile_abs): 946 | """Extract version information from the given file.""" 947 | # the code embedded in _version.py can just fetch the value of these 948 | # keywords. When used from setup.py, we don't want to import _version.py, 949 | # so we do it with a regexp instead. This function is not used from 950 | # _version.py. 951 | keywords = {} 952 | try: 953 | f = open(versionfile_abs, "r") 954 | for line in f.readlines(): 955 | if line.strip().startswith("git_refnames ="): 956 | mo = re.search(r'=\s*"(.*)"', line) 957 | if mo: 958 | keywords["refnames"] = mo.group(1) 959 | if line.strip().startswith("git_full ="): 960 | mo = re.search(r'=\s*"(.*)"', line) 961 | if mo: 962 | keywords["full"] = mo.group(1) 963 | if line.strip().startswith("git_date ="): 964 | mo = re.search(r'=\s*"(.*)"', line) 965 | if mo: 966 | keywords["date"] = mo.group(1) 967 | f.close() 968 | except EnvironmentError: 969 | pass 970 | return keywords 971 | 972 | 973 | @register_vcs_handler("git", "keywords") 974 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 975 | """Get version information from git keywords.""" 976 | if not keywords: 977 | raise NotThisMethod("no keywords at all, weird") 978 | date = keywords.get("date") 979 | if date is not None: 980 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 981 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 982 | # -like" string, which we must then edit to make compliant), because 983 | # it's been around since git-1.5.3, and it's too difficult to 984 | # discover which version we're using, or to work around using an 985 | # older one. 986 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 987 | refnames = keywords["refnames"].strip() 988 | if refnames.startswith("$Format"): 989 | if verbose: 990 | print("keywords are unexpanded, not using") 991 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 992 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 993 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 994 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 995 | TAG = "tag: " 996 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 997 | if not tags: 998 | # Either we're using git < 1.8.3, or there really are no tags. We use 999 | # a heuristic: assume all version tags have a digit. The old git %d 1000 | # expansion behaves like git log --decorate=short and strips out the 1001 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 1002 | # between branches and tags. By ignoring refnames without digits, we 1003 | # filter out many common branch names like "release" and 1004 | # "stabilization", as well as "HEAD" and "master". 1005 | tags = set([r for r in refs if re.search(r'\d', r)]) 1006 | if verbose: 1007 | print("discarding '%s', no digits" % ",".join(refs - tags)) 1008 | if verbose: 1009 | print("likely tags: %s" % ",".join(sorted(tags))) 1010 | for ref in sorted(tags): 1011 | # sorting will prefer e.g. "2.0" over "2.0rc1" 1012 | if ref.startswith(tag_prefix): 1013 | r = ref[len(tag_prefix):] 1014 | if verbose: 1015 | print("picking %s" % r) 1016 | return {"version": r, 1017 | "full-revisionid": keywords["full"].strip(), 1018 | "dirty": False, "error": None, 1019 | "date": date} 1020 | # no suitable tags, so version is "0+unknown", but full hex is still there 1021 | if verbose: 1022 | print("no suitable tags, using unknown + full revision id") 1023 | return {"version": "0+unknown", 1024 | "full-revisionid": keywords["full"].strip(), 1025 | "dirty": False, "error": "no suitable tags", "date": None} 1026 | 1027 | 1028 | @register_vcs_handler("git", "pieces_from_vcs") 1029 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 1030 | """Get version from 'git describe' in the root of the source tree. 1031 | 1032 | This only gets called if the git-archive 'subst' keywords were *not* 1033 | expanded, and _version.py hasn't already been rewritten with a short 1034 | version string, meaning we're inside a checked out source tree. 1035 | """ 1036 | GITS = ["git"] 1037 | if sys.platform == "win32": 1038 | GITS = ["git.cmd", "git.exe"] 1039 | 1040 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 1041 | hide_stderr=True) 1042 | if rc != 0: 1043 | if verbose: 1044 | print("Directory %s not under git control" % root) 1045 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 1046 | 1047 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 1048 | # if there isn't one, this yields HEX[-dirty] (no NUM) 1049 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 1050 | "--always", "--long", 1051 | "--match", "%s*" % tag_prefix], 1052 | cwd=root) 1053 | # --long was added in git-1.5.5 1054 | if describe_out is None: 1055 | raise NotThisMethod("'git describe' failed") 1056 | describe_out = describe_out.strip() 1057 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 1058 | if full_out is None: 1059 | raise NotThisMethod("'git rev-parse' failed") 1060 | full_out = full_out.strip() 1061 | 1062 | pieces = {} 1063 | pieces["long"] = full_out 1064 | pieces["short"] = full_out[:7] # maybe improved later 1065 | pieces["error"] = None 1066 | 1067 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 1068 | # TAG might have hyphens. 1069 | git_describe = describe_out 1070 | 1071 | # look for -dirty suffix 1072 | dirty = git_describe.endswith("-dirty") 1073 | pieces["dirty"] = dirty 1074 | if dirty: 1075 | git_describe = git_describe[:git_describe.rindex("-dirty")] 1076 | 1077 | # now we have TAG-NUM-gHEX or HEX 1078 | 1079 | if "-" in git_describe: 1080 | # TAG-NUM-gHEX 1081 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 1082 | if not mo: 1083 | # unparseable. Maybe git-describe is misbehaving? 1084 | pieces["error"] = ("unable to parse git-describe output: '%s'" 1085 | % describe_out) 1086 | return pieces 1087 | 1088 | # tag 1089 | full_tag = mo.group(1) 1090 | if not full_tag.startswith(tag_prefix): 1091 | if verbose: 1092 | fmt = "tag '%s' doesn't start with prefix '%s'" 1093 | print(fmt % (full_tag, tag_prefix)) 1094 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 1095 | % (full_tag, tag_prefix)) 1096 | return pieces 1097 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 1098 | 1099 | # distance: number of commits since tag 1100 | pieces["distance"] = int(mo.group(2)) 1101 | 1102 | # commit: short hex revision ID 1103 | pieces["short"] = mo.group(3) 1104 | 1105 | else: 1106 | # HEX: no tags 1107 | pieces["closest-tag"] = None 1108 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 1109 | cwd=root) 1110 | pieces["distance"] = int(count_out) # total number of commits 1111 | 1112 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 1113 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 1114 | cwd=root)[0].strip() 1115 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 1116 | 1117 | return pieces 1118 | 1119 | 1120 | def do_vcs_install(manifest_in, versionfile_source, ipy): 1121 | """Git-specific installation logic for Versioneer. 1122 | 1123 | For Git, this means creating/changing .gitattributes to mark _version.py 1124 | for export-subst keyword substitution. 1125 | """ 1126 | GITS = ["git"] 1127 | if sys.platform == "win32": 1128 | GITS = ["git.cmd", "git.exe"] 1129 | files = [manifest_in, versionfile_source] 1130 | if ipy: 1131 | files.append(ipy) 1132 | try: 1133 | me = __file__ 1134 | if me.endswith(".pyc") or me.endswith(".pyo"): 1135 | me = os.path.splitext(me)[0] + ".py" 1136 | versioneer_file = os.path.relpath(me) 1137 | except NameError: 1138 | versioneer_file = "versioneer.py" 1139 | files.append(versioneer_file) 1140 | present = False 1141 | try: 1142 | f = open(".gitattributes", "r") 1143 | for line in f.readlines(): 1144 | if line.strip().startswith(versionfile_source): 1145 | if "export-subst" in line.strip().split()[1:]: 1146 | present = True 1147 | f.close() 1148 | except EnvironmentError: 1149 | pass 1150 | if not present: 1151 | f = open(".gitattributes", "a+") 1152 | f.write("%s export-subst\n" % versionfile_source) 1153 | f.close() 1154 | files.append(".gitattributes") 1155 | run_command(GITS, ["add", "--"] + files) 1156 | 1157 | 1158 | def versions_from_parentdir(parentdir_prefix, root, verbose): 1159 | """Try to determine the version from the parent directory name. 1160 | 1161 | Source tarballs conventionally unpack into a directory that includes both 1162 | the project name and a version string. We will also support searching up 1163 | two directory levels for an appropriately named parent directory 1164 | """ 1165 | rootdirs = [] 1166 | 1167 | for i in range(3): 1168 | dirname = os.path.basename(root) 1169 | if dirname.startswith(parentdir_prefix): 1170 | return {"version": dirname[len(parentdir_prefix):], 1171 | "full-revisionid": None, 1172 | "dirty": False, "error": None, "date": None} 1173 | else: 1174 | rootdirs.append(root) 1175 | root = os.path.dirname(root) # up a level 1176 | 1177 | if verbose: 1178 | print("Tried directories %s but none started with prefix %s" % 1179 | (str(rootdirs), parentdir_prefix)) 1180 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 1181 | 1182 | 1183 | SHORT_VERSION_PY = """ 1184 | # This file was generated by 'versioneer.py' (0.18) from 1185 | # revision-control system data, or from the parent directory name of an 1186 | # unpacked source archive. Distribution tarballs contain a pre-generated copy 1187 | # of this file. 1188 | 1189 | import json 1190 | 1191 | version_json = ''' 1192 | %s 1193 | ''' # END VERSION_JSON 1194 | 1195 | 1196 | def get_versions(): 1197 | return json.loads(version_json) 1198 | """ 1199 | 1200 | 1201 | def versions_from_file(filename): 1202 | """Try to determine the version from _version.py if present.""" 1203 | try: 1204 | with open(filename) as f: 1205 | contents = f.read() 1206 | except EnvironmentError: 1207 | raise NotThisMethod("unable to read _version.py") 1208 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", 1209 | contents, re.M | re.S) 1210 | if not mo: 1211 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", 1212 | contents, re.M | re.S) 1213 | if not mo: 1214 | raise NotThisMethod("no version_json in _version.py") 1215 | return json.loads(mo.group(1)) 1216 | 1217 | 1218 | def write_to_version_file(filename, versions): 1219 | """Write the given version number to the given _version.py file.""" 1220 | os.unlink(filename) 1221 | contents = json.dumps(versions, sort_keys=True, 1222 | indent=1, separators=(",", ": ")) 1223 | with open(filename, "w") as f: 1224 | f.write(SHORT_VERSION_PY % contents) 1225 | 1226 | print("set %s to '%s'" % (filename, versions["version"])) 1227 | 1228 | 1229 | def plus_or_dot(pieces): 1230 | """Return a + if we don't already have one, else return a .""" 1231 | if "+" in pieces.get("closest-tag", ""): 1232 | return "." 1233 | return "+" 1234 | 1235 | 1236 | def render_pep440(pieces): 1237 | """Build up version string, with post-release "local version identifier". 1238 | 1239 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 1240 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 1241 | 1242 | Exceptions: 1243 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 1244 | """ 1245 | if pieces["closest-tag"]: 1246 | rendered = pieces["closest-tag"] 1247 | if pieces["distance"] or pieces["dirty"]: 1248 | rendered += plus_or_dot(pieces) 1249 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 1250 | if pieces["dirty"]: 1251 | rendered += ".dirty" 1252 | else: 1253 | # exception #1 1254 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 1255 | pieces["short"]) 1256 | if pieces["dirty"]: 1257 | rendered += ".dirty" 1258 | return rendered 1259 | 1260 | 1261 | def render_pep440_pre(pieces): 1262 | """TAG[.post.devDISTANCE] -- No -dirty. 1263 | 1264 | Exceptions: 1265 | 1: no tags. 0.post.devDISTANCE 1266 | """ 1267 | if pieces["closest-tag"]: 1268 | rendered = pieces["closest-tag"] 1269 | if pieces["distance"]: 1270 | rendered += ".post.dev%d" % pieces["distance"] 1271 | else: 1272 | # exception #1 1273 | rendered = "0.post.dev%d" % pieces["distance"] 1274 | return rendered 1275 | 1276 | 1277 | def render_pep440_post(pieces): 1278 | """TAG[.postDISTANCE[.dev0]+gHEX] . 1279 | 1280 | The ".dev0" means dirty. Note that .dev0 sorts backwards 1281 | (a dirty tree will appear "older" than the corresponding clean one), 1282 | but you shouldn't be releasing software with -dirty anyways. 1283 | 1284 | Exceptions: 1285 | 1: no tags. 0.postDISTANCE[.dev0] 1286 | """ 1287 | if pieces["closest-tag"]: 1288 | rendered = pieces["closest-tag"] 1289 | if pieces["distance"] or pieces["dirty"]: 1290 | rendered += ".post%d" % pieces["distance"] 1291 | if pieces["dirty"]: 1292 | rendered += ".dev0" 1293 | rendered += plus_or_dot(pieces) 1294 | rendered += "g%s" % pieces["short"] 1295 | else: 1296 | # exception #1 1297 | rendered = "0.post%d" % pieces["distance"] 1298 | if pieces["dirty"]: 1299 | rendered += ".dev0" 1300 | rendered += "+g%s" % pieces["short"] 1301 | return rendered 1302 | 1303 | 1304 | def render_pep440_old(pieces): 1305 | """TAG[.postDISTANCE[.dev0]] . 1306 | 1307 | The ".dev0" means dirty. 1308 | 1309 | Eexceptions: 1310 | 1: no tags. 0.postDISTANCE[.dev0] 1311 | """ 1312 | if pieces["closest-tag"]: 1313 | rendered = pieces["closest-tag"] 1314 | if pieces["distance"] or pieces["dirty"]: 1315 | rendered += ".post%d" % pieces["distance"] 1316 | if pieces["dirty"]: 1317 | rendered += ".dev0" 1318 | else: 1319 | # exception #1 1320 | rendered = "0.post%d" % pieces["distance"] 1321 | if pieces["dirty"]: 1322 | rendered += ".dev0" 1323 | return rendered 1324 | 1325 | 1326 | def render_git_describe(pieces): 1327 | """TAG[-DISTANCE-gHEX][-dirty]. 1328 | 1329 | Like 'git describe --tags --dirty --always'. 1330 | 1331 | Exceptions: 1332 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1333 | """ 1334 | if pieces["closest-tag"]: 1335 | rendered = pieces["closest-tag"] 1336 | if pieces["distance"]: 1337 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1338 | else: 1339 | # exception #1 1340 | rendered = pieces["short"] 1341 | if pieces["dirty"]: 1342 | rendered += "-dirty" 1343 | return rendered 1344 | 1345 | 1346 | def render_git_describe_long(pieces): 1347 | """TAG-DISTANCE-gHEX[-dirty]. 1348 | 1349 | Like 'git describe --tags --dirty --always -long'. 1350 | The distance/hash is unconditional. 1351 | 1352 | Exceptions: 1353 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 1354 | """ 1355 | if pieces["closest-tag"]: 1356 | rendered = pieces["closest-tag"] 1357 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 1358 | else: 1359 | # exception #1 1360 | rendered = pieces["short"] 1361 | if pieces["dirty"]: 1362 | rendered += "-dirty" 1363 | return rendered 1364 | 1365 | 1366 | def render(pieces, style): 1367 | """Render the given version pieces into the requested style.""" 1368 | if pieces["error"]: 1369 | return {"version": "unknown", 1370 | "full-revisionid": pieces.get("long"), 1371 | "dirty": None, 1372 | "error": pieces["error"], 1373 | "date": None} 1374 | 1375 | if not style or style == "default": 1376 | style = "pep440" # the default 1377 | 1378 | if style == "pep440": 1379 | rendered = render_pep440(pieces) 1380 | elif style == "pep440-pre": 1381 | rendered = render_pep440_pre(pieces) 1382 | elif style == "pep440-post": 1383 | rendered = render_pep440_post(pieces) 1384 | elif style == "pep440-old": 1385 | rendered = render_pep440_old(pieces) 1386 | elif style == "git-describe": 1387 | rendered = render_git_describe(pieces) 1388 | elif style == "git-describe-long": 1389 | rendered = render_git_describe_long(pieces) 1390 | else: 1391 | raise ValueError("unknown style '%s'" % style) 1392 | 1393 | return {"version": rendered, "full-revisionid": pieces["long"], 1394 | "dirty": pieces["dirty"], "error": None, 1395 | "date": pieces.get("date")} 1396 | 1397 | 1398 | class VersioneerBadRootError(Exception): 1399 | """The project root directory is unknown or missing key files.""" 1400 | 1401 | 1402 | def get_versions(verbose=False): 1403 | """Get the project version from whatever source is available. 1404 | 1405 | Returns dict with two keys: 'version' and 'full'. 1406 | """ 1407 | if "versioneer" in sys.modules: 1408 | # see the discussion in cmdclass.py:get_cmdclass() 1409 | del sys.modules["versioneer"] 1410 | 1411 | root = get_root() 1412 | cfg = get_config_from_root(root) 1413 | 1414 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg" 1415 | handlers = HANDLERS.get(cfg.VCS) 1416 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS 1417 | verbose = verbose or cfg.verbose 1418 | assert cfg.versionfile_source is not None, \ 1419 | "please set versioneer.versionfile_source" 1420 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" 1421 | 1422 | versionfile_abs = os.path.join(root, cfg.versionfile_source) 1423 | 1424 | # extract version from first of: _version.py, VCS command (e.g. 'git 1425 | # describe'), parentdir. This is meant to work for developers using a 1426 | # source checkout, for users of a tarball created by 'setup.py sdist', 1427 | # and for users of a tarball/zipball created by 'git archive' or github's 1428 | # download-from-tag feature or the equivalent in other VCSes. 1429 | 1430 | get_keywords_f = handlers.get("get_keywords") 1431 | from_keywords_f = handlers.get("keywords") 1432 | if get_keywords_f and from_keywords_f: 1433 | try: 1434 | keywords = get_keywords_f(versionfile_abs) 1435 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose) 1436 | if verbose: 1437 | print("got version from expanded keyword %s" % ver) 1438 | return ver 1439 | except NotThisMethod: 1440 | pass 1441 | 1442 | try: 1443 | ver = versions_from_file(versionfile_abs) 1444 | if verbose: 1445 | print("got version from file %s %s" % (versionfile_abs, ver)) 1446 | return ver 1447 | except NotThisMethod: 1448 | pass 1449 | 1450 | from_vcs_f = handlers.get("pieces_from_vcs") 1451 | if from_vcs_f: 1452 | try: 1453 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose) 1454 | ver = render(pieces, cfg.style) 1455 | if verbose: 1456 | print("got version from VCS %s" % ver) 1457 | return ver 1458 | except NotThisMethod: 1459 | pass 1460 | 1461 | try: 1462 | if cfg.parentdir_prefix: 1463 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 1464 | if verbose: 1465 | print("got version from parentdir %s" % ver) 1466 | return ver 1467 | except NotThisMethod: 1468 | pass 1469 | 1470 | if verbose: 1471 | print("unable to compute version") 1472 | 1473 | return {"version": "0+unknown", "full-revisionid": None, 1474 | "dirty": None, "error": "unable to compute version", 1475 | "date": None} 1476 | 1477 | 1478 | def get_version(): 1479 | """Get the short version string for this project.""" 1480 | return get_versions()["version"] 1481 | 1482 | 1483 | def get_cmdclass(): 1484 | """Get the custom setuptools/distutils subclasses used by Versioneer.""" 1485 | if "versioneer" in sys.modules: 1486 | del sys.modules["versioneer"] 1487 | # this fixes the "python setup.py develop" case (also 'install' and 1488 | # 'easy_install .'), in which subdependencies of the main project are 1489 | # built (using setup.py bdist_egg) in the same python process. Assume 1490 | # a main project A and a dependency B, which use different versions 1491 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in 1492 | # sys.modules by the time B's setup.py is executed, causing B to run 1493 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a 1494 | # sandbox that restores sys.modules to it's pre-build state, so the 1495 | # parent is protected against the child's "import versioneer". By 1496 | # removing ourselves from sys.modules here, before the child build 1497 | # happens, we protect the child from the parent's versioneer too. 1498 | # Also see https://github.com/warner/python-versioneer/issues/52 1499 | 1500 | cmds = {} 1501 | 1502 | # we add "version" to both distutils and setuptools 1503 | from distutils.core import Command 1504 | 1505 | class cmd_version(Command): 1506 | description = "report generated version string" 1507 | user_options = [] 1508 | boolean_options = [] 1509 | 1510 | def initialize_options(self): 1511 | pass 1512 | 1513 | def finalize_options(self): 1514 | pass 1515 | 1516 | def run(self): 1517 | vers = get_versions(verbose=True) 1518 | print("Version: %s" % vers["version"]) 1519 | print(" full-revisionid: %s" % vers.get("full-revisionid")) 1520 | print(" dirty: %s" % vers.get("dirty")) 1521 | print(" date: %s" % vers.get("date")) 1522 | if vers["error"]: 1523 | print(" error: %s" % vers["error"]) 1524 | cmds["version"] = cmd_version 1525 | 1526 | # we override "build_py" in both distutils and setuptools 1527 | # 1528 | # most invocation pathways end up running build_py: 1529 | # distutils/build -> build_py 1530 | # distutils/install -> distutils/build ->.. 1531 | # setuptools/bdist_wheel -> distutils/install ->.. 1532 | # setuptools/bdist_egg -> distutils/install_lib -> build_py 1533 | # setuptools/install -> bdist_egg ->.. 1534 | # setuptools/develop -> ? 1535 | # pip install: 1536 | # copies source tree to a tempdir before running egg_info/etc 1537 | # if .git isn't copied too, 'git describe' will fail 1538 | # then does setup.py bdist_wheel, or sometimes setup.py install 1539 | # setup.py egg_info -> ? 1540 | 1541 | # we override different "build_py" commands for both environments 1542 | if "setuptools" in sys.modules: 1543 | from setuptools.command.build_py import build_py as _build_py 1544 | else: 1545 | from distutils.command.build_py import build_py as _build_py 1546 | 1547 | class cmd_build_py(_build_py): 1548 | def run(self): 1549 | root = get_root() 1550 | cfg = get_config_from_root(root) 1551 | versions = get_versions() 1552 | _build_py.run(self) 1553 | # now locate _version.py in the new build/ directory and replace 1554 | # it with an updated value 1555 | if cfg.versionfile_build: 1556 | target_versionfile = os.path.join(self.build_lib, 1557 | cfg.versionfile_build) 1558 | print("UPDATING %s" % target_versionfile) 1559 | write_to_version_file(target_versionfile, versions) 1560 | cmds["build_py"] = cmd_build_py 1561 | 1562 | if "cx_Freeze" in sys.modules: # cx_freeze enabled? 1563 | from cx_Freeze.dist import build_exe as _build_exe 1564 | # nczeczulin reports that py2exe won't like the pep440-style string 1565 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. 1566 | # setup(console=[{ 1567 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION 1568 | # "product_version": versioneer.get_version(), 1569 | # ... 1570 | 1571 | class cmd_build_exe(_build_exe): 1572 | def run(self): 1573 | root = get_root() 1574 | cfg = get_config_from_root(root) 1575 | versions = get_versions() 1576 | target_versionfile = cfg.versionfile_source 1577 | print("UPDATING %s" % target_versionfile) 1578 | write_to_version_file(target_versionfile, versions) 1579 | 1580 | _build_exe.run(self) 1581 | os.unlink(target_versionfile) 1582 | with open(cfg.versionfile_source, "w") as f: 1583 | LONG = LONG_VERSION_PY[cfg.VCS] 1584 | f.write(LONG % 1585 | {"DOLLAR": "$", 1586 | "STYLE": cfg.style, 1587 | "TAG_PREFIX": cfg.tag_prefix, 1588 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1589 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1590 | }) 1591 | cmds["build_exe"] = cmd_build_exe 1592 | del cmds["build_py"] 1593 | 1594 | if 'py2exe' in sys.modules: # py2exe enabled? 1595 | try: 1596 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3 1597 | except ImportError: 1598 | from py2exe.build_exe import py2exe as _py2exe # py2 1599 | 1600 | class cmd_py2exe(_py2exe): 1601 | def run(self): 1602 | root = get_root() 1603 | cfg = get_config_from_root(root) 1604 | versions = get_versions() 1605 | target_versionfile = cfg.versionfile_source 1606 | print("UPDATING %s" % target_versionfile) 1607 | write_to_version_file(target_versionfile, versions) 1608 | 1609 | _py2exe.run(self) 1610 | os.unlink(target_versionfile) 1611 | with open(cfg.versionfile_source, "w") as f: 1612 | LONG = LONG_VERSION_PY[cfg.VCS] 1613 | f.write(LONG % 1614 | {"DOLLAR": "$", 1615 | "STYLE": cfg.style, 1616 | "TAG_PREFIX": cfg.tag_prefix, 1617 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1618 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1619 | }) 1620 | cmds["py2exe"] = cmd_py2exe 1621 | 1622 | # we override different "sdist" commands for both environments 1623 | if "setuptools" in sys.modules: 1624 | from setuptools.command.sdist import sdist as _sdist 1625 | else: 1626 | from distutils.command.sdist import sdist as _sdist 1627 | 1628 | class cmd_sdist(_sdist): 1629 | def run(self): 1630 | versions = get_versions() 1631 | self._versioneer_generated_versions = versions 1632 | # unless we update this, the command will keep using the old 1633 | # version 1634 | self.distribution.metadata.version = versions["version"] 1635 | return _sdist.run(self) 1636 | 1637 | def make_release_tree(self, base_dir, files): 1638 | root = get_root() 1639 | cfg = get_config_from_root(root) 1640 | _sdist.make_release_tree(self, base_dir, files) 1641 | # now locate _version.py in the new base_dir directory 1642 | # (remembering that it may be a hardlink) and replace it with an 1643 | # updated value 1644 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source) 1645 | print("UPDATING %s" % target_versionfile) 1646 | write_to_version_file(target_versionfile, 1647 | self._versioneer_generated_versions) 1648 | cmds["sdist"] = cmd_sdist 1649 | 1650 | return cmds 1651 | 1652 | 1653 | CONFIG_ERROR = """ 1654 | setup.cfg is missing the necessary Versioneer configuration. You need 1655 | a section like: 1656 | 1657 | [versioneer] 1658 | VCS = git 1659 | style = pep440 1660 | versionfile_source = src/myproject/_version.py 1661 | versionfile_build = myproject/_version.py 1662 | tag_prefix = 1663 | parentdir_prefix = myproject- 1664 | 1665 | You will also need to edit your setup.py to use the results: 1666 | 1667 | import versioneer 1668 | setup(version=versioneer.get_version(), 1669 | cmdclass=versioneer.get_cmdclass(), ...) 1670 | 1671 | Please read the docstring in ./versioneer.py for configuration instructions, 1672 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'. 1673 | """ 1674 | 1675 | SAMPLE_CONFIG = """ 1676 | # See the docstring in versioneer.py for instructions. Note that you must 1677 | # re-run 'versioneer.py setup' after changing this section, and commit the 1678 | # resulting files. 1679 | 1680 | [versioneer] 1681 | #VCS = git 1682 | #style = pep440 1683 | #versionfile_source = 1684 | #versionfile_build = 1685 | #tag_prefix = 1686 | #parentdir_prefix = 1687 | 1688 | """ 1689 | 1690 | INIT_PY_SNIPPET = """ 1691 | from ._version import get_versions 1692 | __version__ = get_versions()['version'] 1693 | del get_versions 1694 | """ 1695 | 1696 | 1697 | def do_setup(): 1698 | """Main VCS-independent setup function for installing Versioneer.""" 1699 | root = get_root() 1700 | try: 1701 | cfg = get_config_from_root(root) 1702 | except (EnvironmentError, configparser.NoSectionError, 1703 | configparser.NoOptionError) as e: 1704 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)): 1705 | print("Adding sample versioneer config to setup.cfg", 1706 | file=sys.stderr) 1707 | with open(os.path.join(root, "setup.cfg"), "a") as f: 1708 | f.write(SAMPLE_CONFIG) 1709 | print(CONFIG_ERROR, file=sys.stderr) 1710 | return 1 1711 | 1712 | print(" creating %s" % cfg.versionfile_source) 1713 | with open(cfg.versionfile_source, "w") as f: 1714 | LONG = LONG_VERSION_PY[cfg.VCS] 1715 | f.write(LONG % {"DOLLAR": "$", 1716 | "STYLE": cfg.style, 1717 | "TAG_PREFIX": cfg.tag_prefix, 1718 | "PARENTDIR_PREFIX": cfg.parentdir_prefix, 1719 | "VERSIONFILE_SOURCE": cfg.versionfile_source, 1720 | }) 1721 | 1722 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source), 1723 | "__init__.py") 1724 | if os.path.exists(ipy): 1725 | try: 1726 | with open(ipy, "r") as f: 1727 | old = f.read() 1728 | except EnvironmentError: 1729 | old = "" 1730 | if INIT_PY_SNIPPET not in old: 1731 | print(" appending to %s" % ipy) 1732 | with open(ipy, "a") as f: 1733 | f.write(INIT_PY_SNIPPET) 1734 | else: 1735 | print(" %s unmodified" % ipy) 1736 | else: 1737 | print(" %s doesn't exist, ok" % ipy) 1738 | ipy = None 1739 | 1740 | # Make sure both the top-level "versioneer.py" and versionfile_source 1741 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so 1742 | # they'll be copied into source distributions. Pip won't be able to 1743 | # install the package without this. 1744 | manifest_in = os.path.join(root, "MANIFEST.in") 1745 | simple_includes = set() 1746 | try: 1747 | with open(manifest_in, "r") as f: 1748 | for line in f: 1749 | if line.startswith("include "): 1750 | for include in line.split()[1:]: 1751 | simple_includes.add(include) 1752 | except EnvironmentError: 1753 | pass 1754 | # That doesn't cover everything MANIFEST.in can do 1755 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so 1756 | # it might give some false negatives. Appending redundant 'include' 1757 | # lines is safe, though. 1758 | if "versioneer.py" not in simple_includes: 1759 | print(" appending 'versioneer.py' to MANIFEST.in") 1760 | with open(manifest_in, "a") as f: 1761 | f.write("include versioneer.py\n") 1762 | else: 1763 | print(" 'versioneer.py' already in MANIFEST.in") 1764 | if cfg.versionfile_source not in simple_includes: 1765 | print(" appending versionfile_source ('%s') to MANIFEST.in" % 1766 | cfg.versionfile_source) 1767 | with open(manifest_in, "a") as f: 1768 | f.write("include %s\n" % cfg.versionfile_source) 1769 | else: 1770 | print(" versionfile_source already in MANIFEST.in") 1771 | 1772 | # Make VCS-specific changes. For git, this means creating/changing 1773 | # .gitattributes to mark _version.py for export-subst keyword 1774 | # substitution. 1775 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy) 1776 | return 0 1777 | 1778 | 1779 | def scan_setup_py(): 1780 | """Validate the contents of setup.py against Versioneer's expectations.""" 1781 | found = set() 1782 | setters = False 1783 | errors = 0 1784 | with open("setup.py", "r") as f: 1785 | for line in f.readlines(): 1786 | if "import versioneer" in line: 1787 | found.add("import") 1788 | if "versioneer.get_cmdclass()" in line: 1789 | found.add("cmdclass") 1790 | if "versioneer.get_version()" in line: 1791 | found.add("get_version") 1792 | if "versioneer.VCS" in line: 1793 | setters = True 1794 | if "versioneer.versionfile_source" in line: 1795 | setters = True 1796 | if len(found) != 3: 1797 | print("") 1798 | print("Your setup.py appears to be missing some important items") 1799 | print("(but I might be wrong). Please make sure it has something") 1800 | print("roughly like the following:") 1801 | print("") 1802 | print(" import versioneer") 1803 | print(" setup( version=versioneer.get_version(),") 1804 | print(" cmdclass=versioneer.get_cmdclass(), ...)") 1805 | print("") 1806 | errors += 1 1807 | if setters: 1808 | print("You should remove lines like 'versioneer.VCS = ' and") 1809 | print("'versioneer.versionfile_source = ' . This configuration") 1810 | print("now lives in setup.cfg, and should be removed from setup.py") 1811 | print("") 1812 | errors += 1 1813 | return errors 1814 | 1815 | 1816 | if __name__ == "__main__": 1817 | cmd = sys.argv[1] 1818 | if cmd == "setup": 1819 | errors = do_setup() 1820 | errors += scan_setup_py() 1821 | if errors: 1822 | sys.exit(1) 1823 | --------------------------------------------------------------------------------