├── .anylint
├── .codacy.yml
├── .codebeatignore
├── .coveragerc
├── .gitattributes
├── .gitignore
├── .isort.cfg
├── .readthedocs.yml
├── .travis.yml
├── .whitesource
├── LICENSE
├── MANIFEST.in
├── README.rst
├── codecov.yml
├── docker
└── MongoDB
│ ├── docker-compose.yml
│ ├── mongo-express.env
│ ├── mongo-initdb.d
│ └── createUser.js
│ └── mongo.env
├── docs
├── Makefile
├── make.bat
├── requirements.txt
└── source
│ ├── conf.py
│ ├── index.rst
│ ├── intro
│ └── installation.rst
│ ├── items.rst
│ ├── pipelines
│ ├── ItemPipeline.rst
│ └── MongoDB.rst
│ ├── settings.rst
│ └── signals.rst
├── mypy.ini
├── pylintrc
├── pyproject.toml
├── pytest.ini
├── renovate.json
├── requirements.txt
├── scrapy_pipelines
├── __init__.py
├── _version.py
├── items.py
├── pipelines
│ ├── __init__.py
│ └── mongo.py
├── settings
│ ├── __init__.py
│ └── default_settings.py
└── signals.py
├── setup.cfg
├── setup.py
├── tests
├── __init__.py
├── requirements.txt
├── test_pipelines_mongo.py
└── test_settings.py
├── tox.ini
└── versioneer.py
/.anylint:
--------------------------------------------------------------------------------
1 | {
2 | "ignore":[
3 | "scrapy_pipelines/_version.py",
4 | "versioneer.py"
5 | ]
6 | }
7 |
--------------------------------------------------------------------------------
/.codacy.yml:
--------------------------------------------------------------------------------
1 | exclude_paths:
2 | - '.github/**'
3 | - 'scrapy_pipelines/_version.py'
4 | - 'tests/**'
5 | - 'versioneer.py'
6 |
--------------------------------------------------------------------------------
/.codebeatignore:
--------------------------------------------------------------------------------
1 | scrapy_pipelines/_version.py
2 | versioneer.py
3 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 | scrapy_pipelines/_version.py
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | scrapy_pipelines/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 | .pytest_cache/
49 |
50 | # Translations
51 | *.mo
52 | *.pot
53 |
54 | # Django stuff:
55 | *.log
56 | local_settings.py
57 | db.sqlite3
58 |
59 | # Flask stuff:
60 | instance/
61 | .webassets-cache
62 |
63 | # Scrapy stuff:
64 | .scrapy
65 |
66 | # Sphinx documentation
67 | docs/_build/
68 |
69 | # PyBuilder
70 | target/
71 |
72 | # Jupyter Notebook
73 | .ipynb_checkpoints
74 |
75 | # pyenv
76 | .python-version
77 |
78 | # celery beat schedule file
79 | celerybeat-schedule
80 |
81 | # SageMath parsed files
82 | *.sage.py
83 |
84 | # Environments
85 | .env
86 | .venv
87 | env/
88 | venv/
89 | ENV/
90 | env.bak/
91 | venv.bak/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | docker/MongoDB/mongo-db/
107 | docker/MongoDB/mongo-home/
108 |
109 | Pipfile.lock
110 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | skip=scrapy_pipelines/_version.py
3 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | # .readthedocs.yml
2 | # Read the Docs configuration file
3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4 |
5 | # Required
6 | version: 2
7 |
8 | # Build documentation in the docs/ directory with Sphinx
9 | sphinx:
10 | configuration: docs/source/conf.py
11 |
12 | # Build documentation with MkDocs
13 | #mkdocs:
14 | # configuration: mkdocs.yml
15 |
16 | # Optionally build your docs in additional formats such as PDF and ePub
17 | formats: all
18 |
19 | # Optionally set the version of Python and requirements required to build your docs
20 | python:
21 | version: 3.7
22 | install:
23 | - requirements: docs/requirements.txt
24 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | services:
4 | - mongodb
5 |
6 | sudo: false
7 |
8 | branches:
9 | only:
10 | - master
11 | - "/^v\\d+\\.\\d+$/"
12 | - "/^\\d\\.\\d+$/"
13 | - "/^\\d\\.\\d+\\.\\d+(rc\\d+|\\.dev\\d+)?$/"
14 |
15 | matrix:
16 | include:
17 | - env: TOXENV=py36
18 | python: 3.6
19 | - dist: xenial
20 | env: TOXENV=py37
21 | python: 3.7
22 | - env: TOXENV=docs
23 | python: 3.6
24 | - env: TOXENV=docs-coverage
25 | python: 3.6
26 | - env: TOXENV=docs-links
27 | python: 3.6
28 |
29 | install:
30 | - pip install -U tox twine wheel codecov
31 |
32 | before_script:
33 | - mongo test_db --eval 'db.createUser({user:"test_username",pwd:"test_password",roles:["readWrite"]});'
34 |
35 | script: tox
36 |
37 | after_success:
38 | - codecov
39 |
40 | cache:
41 | directories:
42 | - "$HOME/.cache/pip"
43 |
44 | notifications:
45 | slack:
46 | secure: n6gYbtGfOf3AlRKvwSpAgH7t55oJn5FN8BBMiLUpzUz9lto1GBF2c4l2myLjrKK7HmHjJ3lqVIovKBWkx8DT4lygMb6/QvTOnpudQUNan9qk1O2p87wenlKAraE/UAeku+iBTbLdSqo2NoFWOtx7BKgPXDCZpbDJ69Bmj5w3q73ksgTYDkydQdyR0RsnL6Q0LoaFNOmgTphje8uk5u5fM+R2M3Yi3hJmGF69vi5qki8kRoBkah00N8VohNOZ2Sr78cxv09MxkNnDizOhAEGqmyWZQaa8GXTPZYtr8f7CS6KE2UYhgRULdGBDr1LT5LhpP1dY2FD8Wb138yYMQ3hclLvrFiJEpKc3WZ/wAvWnKD/dhfS/6W4soxf1biDiNZSp0ROdqV2vJsqqM4XxSZrL8TzZCu2hyIDPw9DlVMpFW4v88c1F6wa7Ug1MDiVHPgK0d044ccRGFg5KXnsS6TpajmLsNZmxxSZvy3n1dla3heaP1oEb0n+TWl+cctrfh7Rw4iNJF/i5DkSabn14IQ8b4NYd2xWIeQoEtqjyZPXHEGRbz7xJ44f0AUo00ptugRr5/BuKTyxKtwvsE6HEx+3w2HTJ8q6h6xc71Dyih67Ga6n9X7gcUY6UEqCdlAkvjCFIro4jqScTbDadLXvN7aiaev/9lmcfSkM2GBYuVhlyhlw=
47 |
48 | deploy:
49 | provider: pypi
50 | distributions: sdist bdist_wheel
51 | user: scrapedia
52 | password:
53 | secure: Cic+TcpBqYRKXz/GVXA3EaCIu0uy+OXu8Jqi0EdGLDBB+2iUvKEjpSvMT8z2LZH6xZMwI6YwZRSGFdgdZFGF/Y/FxL8FFePCi5jqre9iiwGMIoJLCzLzONHkiz0IBsKtU3uJDtKBU1/NG7Nl7R2kT2HK97vKzt8q6VJpNdPkZLNeT2rpa4Fd9czfaxZ8GEHZ/rxYAVN8eltOGkbD2lh5+cfGxU1EvPJLnMLXGCIefw/+uS5+bt5urGKwF68PoeUEEdu9CcaH5bfo8XsOWqPxBcpyrf7UAufzYVOdTdr2FrBnxj1Xl2sz3ENXw7eRwoh5EoC30QaC3O06NgIm3P38IFSzNxhIvtNrFxCMWI49Wttljx9IuTwscDa1Jg/JlPb1QOHycBtrRgqSHR5MDzBmWdJB4w68S6Igq5rlQT4f0urrrVdLVuJqfkt6I2A22KJOl7rCLXN6Yn2ida4pZk/QBVlHaCEy3YfVx9yIcUjCVcq0DVzETll94zWH0JVRMRv5jEJvoscYNlex/ra/dSoEff3lpqjUH6J94R14FkjrZU+kC+h9vZkqcK8MUZSaDxghiKxOm57ons0gro7KIDHx1KZulQp2QUBUDpSZD0EdOGf6CGGauecf3cMza1WORMJjjeCrkCZA1LsbKnngcSItOUL43+8va/AEPSB4XImQFzU=
54 | on:
55 | branch: master
56 | condition: "$TOXENV == py36"
57 | repo: scrapedia/scrapy-pipelines
58 | tags: true
59 |
--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 | "generalSettings": {
3 | "shouldScanRepo": true
4 | },
5 | "checkRunSettings": {
6 | "vulnerableCheckRunConclusionLevel": "failure"
7 | }
8 | }
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include versioneer.py
2 | include scrapy_pipelines/_version.py
3 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Read more: `noffle/art-of-readme: Learn the art of writing quality READMEs.`_
2 |
3 | .. _`noffle/art-of-readme: Learn the art of writing quality READMEs.`: https://github.com/noffle/art-of-readme
4 |
5 | ================
6 | Scrapy-Pipelines
7 | ================
8 |
9 | Overview
10 | ========
11 |
12 | .. image:: https://bestpractices.coreinfrastructure.org/projects/2828/badge
13 | :alt: CII Best Practices
14 | :target: https://bestpractices.coreinfrastructure.org/projects/2828
15 |
16 | .. image:: https://mperlet.github.io/pybadge/badges/9.43.svg
17 | :alt: pylint Score
18 |
19 | .. image:: https://img.shields.io/travis/scrapedia/scrapy-pipelines/master.svg
20 | :target: http://travis-ci.org/scrapedia/scrapy-pipelines
21 | :alt: Travis branch
22 |
23 | .. image:: https://codecov.io/gh/scrapedia/scrapy-pipelines/branch/master/graph/badge.svg
24 | :target: https://codecov.io/gh/scrapedia/scrapy-pipelines
25 | :alt: Coverage Report
26 |
27 | .. image:: https://codebeat.co/badges/fabc61ba-6a20-4bd1-bf73-a2f091a9ad80
28 | :target: https://codebeat.co/projects/github-com-scrapedia-scrapy-pipelines-master
29 | :alt: codebeat badge
30 |
31 | .. image:: https://api.codacy.com/project/badge/Grade/aeda92e058434a9eb2e8b0512a02235f
32 | :target: https://www.codacy.com/app/grammy-jiang/scrapy-pipelines?utm_source=github.com&utm_medium=referral&utm_content=scrapedia/scrapy-pipelines&utm_campaign=Badge_Grade
33 |
34 | .. image:: https://pyup.io/repos/github/scrapedia/scrapy-pipelines/shield.svg
35 | :target: https://pyup.io/repos/github/scrapedia/scrapy-pipelines/
36 | :alt: Updates
37 |
38 | .. image:: https://snyk.io/test/github/scrapedia/scrapy-pipelines/badge.svg
39 | :target: https://snyk.io/test/github/scrapedia/scrapy-pipelines
40 | :alt: Known Vulnerabilities
41 | .. image:: https://img.shields.io/badge/code%20style-black-000000.svg
42 | :target: https://github.com/python/black
43 | :alt: Code style: black
44 |
45 | .. image:: https://img.shields.io/badge/License-GPLv3-blue.svg
46 | :target: https://www.gnu.org/licenses/gpl-3.0
47 | :alt: License: AGPL v3
48 |
49 | Since Scrapy doesn't provide enough pipelines examples for different backends
50 | or databases, this repository provides severals to demostrate the
51 | decent usages, including:
52 |
53 | * MongoDB
54 | * Redis (todo)
55 | * InfluxDB (todo)
56 | * LevelDB (todo)
57 |
58 | And also these pipelines provide multiple ways to save or update the items, and
59 | return id created by backends
60 |
61 | Requirements
62 | =============
63 |
64 | .. image:: https://pyup.io/repos/github/scrapedia/r18/python-3-shield.svg
65 | :target: https://pyup.io/repos/github/scrapedia/r18/
66 | :alt: Python 3
67 |
68 | * Python 3.6+
69 | * Works on Linux, Windows, Mac OSX
70 |
71 | Installation
72 | ============
73 |
74 | .. image:: https://img.shields.io/pypi/v/scrapy-pipelines.svg
75 | :target: https://pypi.python.org/pypi/scrapy-pipelines
76 | :alt: PyPI
77 | .. image:: https://img.shields.io/pypi/pyversions/scrapy-pipelines.svg
78 | :target: https://pypi.python.org/pypi/scrapy-pipelines
79 | :alt: PyPI - Python Version
80 | .. image:: https://img.shields.io/pypi/wheel/scrapy-pipelines.svg
81 | :target: https://pypi.python.org/pypi/scrapy-pipelines
82 | :alt: PyPI - Wheel
83 |
84 | The quick way:
85 |
86 | pip install scrapy-pipelines
87 |
88 | For more details see the installation section in the documentation:
89 | https://scrapy-pipelines.readthedocs.io/en/latest/intro/installation.html
90 |
91 | Documentation
92 | =============
93 |
94 | Documentation is available online at
95 | https://scrapy-pipelines.readthedocs.io/en/latest/ and in the docs directory.
96 |
97 | Community (blog, twitter, mail list, IRC)
98 | =========================================
99 |
100 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
101 |
102 | See https://scrapy.org/community/
103 |
104 | Contributing
105 | ============
106 |
107 | *Keeping this section same as Scrapy is intending to be easier when this repo
108 | merge back to Scrapy.*
109 |
110 | See https://doc.scrapy.org/en/master/contributing.html
111 |
112 | Code of Conduct
113 | ---------------
114 |
115 | Please note that this project is released with a Contributor Code of Conduct
116 | (see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md).
117 |
118 | By participating in this project you agree to abide by its terms.
119 | Please report unacceptable behavior to opensource@scrapinghub.com.
120 |
121 |
122 | Companies using Scrapy
123 | ======================
124 |
125 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
126 |
127 | See https://scrapy.org/companies/
128 |
129 | Commercial Support
130 | ==================
131 |
132 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
133 |
134 | See https://scrapy.org/support/
135 |
136 | TODO
137 | ====
138 |
139 | * [X] Add indexes creation in open_spider()
140 | * [X] Add item_completed method
141 | * [X] Add signals for MongoDB document's id return
142 | * [ ] Add MongoDB document update
143 | * [ ] Add Percona Server for MongoDB docker support
144 | * [ ] Add Redis support
145 | * [ ] Add InfluxDB support
146 | * [ ] Add LevelDB support
147 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | coverage:
2 | notify:
3 | slack:
4 | default:
5 | url: "https://hooks.slack.com/services/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"
6 |
--------------------------------------------------------------------------------
/docker/MongoDB/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3"
2 | services:
3 | # https://hub.docker.com/_/mongo/
4 | mongo:
5 | container_name: dc-mongo
6 | env_file:
7 | - mongo.env
8 | image: mongo:latest
9 | networks:
10 | - mongo
11 | ports:
12 | - 27017:27017
13 | restart: always
14 | tty: true
15 | volumes:
16 | - ./mongo-db:/data/db
17 | - ./mongo-initdb.d:/docker-entrypoint-initdb.d
18 | # https://hub.docker.com/_/mongo-express/
19 | mongo-express:
20 | container_name: dc-mongodb-express
21 | depends_on:
22 | - mongo
23 | env_file:
24 | - mongo-express.env
25 | image: mongo-express:latest
26 | links:
27 | - mongo
28 | networks:
29 | - mongo
30 | ports:
31 | - 8081:8081
32 | restart: always
33 | tty: true
34 |
35 | networks:
36 | mongo:
37 | driver: bridge
38 |
--------------------------------------------------------------------------------
/docker/MongoDB/mongo-express.env:
--------------------------------------------------------------------------------
1 | ME_CONFIG_MONGODB_ADMINUSERNAME=root
2 | ME_CONFIG_MONGODB_ADMINPASSWORD=password
3 | ME_CONFIG_MONGODB_PORT=27017
4 | ME_CONFIG_MONGODB_SERVER=mongo
5 |
--------------------------------------------------------------------------------
/docker/MongoDB/mongo-initdb.d/createUser.js:
--------------------------------------------------------------------------------
1 | db = db.getSiblingDB('test_db'),
2 | db.createUser({
3 | user: "test_username",
4 | pwd: "test_password",
5 | roles: ["readWrite"]
6 | });
7 |
--------------------------------------------------------------------------------
/docker/MongoDB/mongo.env:
--------------------------------------------------------------------------------
1 | MONGO_INITDB_ROOT_USERNAME=root
2 | MONGO_INITDB_ROOT_PASSWORD=password
3 | MONGO_INITDB_DATABASE=admin
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SOURCEDIR = source
8 | BUILDDIR = build
9 |
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 |
14 | .PHONY: help Makefile
15 |
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo==3.11.3
2 | scrapy==2.4.1
3 | sphinx==3.5.2
4 | txmongo==19.2.0
5 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # http://www.sphinx-doc.org/en/master/config
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import sys
14 | from pathlib import Path
15 |
16 | sys.path.insert(0, str(Path("/").joinpath(*Path(__file__).parts[:-3])))
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = "Scrapy Pipelines"
22 | copyright = "2019, Scrapedia"
23 | author = "Scrapedia"
24 |
25 |
26 | # -- General configuration ---------------------------------------------------
27 |
28 | # Add any Sphinx extension module names here, as strings. They can be
29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
30 | # ones.
31 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.coverage"]
32 |
33 | # Add any paths that contain templates here, relative to this directory.
34 | templates_path = ["_templates"]
35 |
36 | # List of patterns, relative to source directory, that match files and
37 | # directories to ignore when looking for source files.
38 | # This pattern also affects html_static_path and html_extra_path.
39 | exclude_patterns = []
40 |
41 |
42 | # -- Options for HTML output -------------------------------------------------
43 |
44 | # The theme to use for HTML and HTML Help pages. See the documentation for
45 | # a list of builtin themes.
46 | #
47 | html_theme = "alabaster"
48 |
49 | # Add any paths that contain custom static files (such as style sheets) here,
50 | # relative to this directory. They are copied after the builtin static files,
51 | # so a file named "default.css" will overwrite the builtin "default.css".
52 | html_static_path = ["_static"]
53 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. _topics-index:
2 |
3 | Scrapy Pipelines |version| documentation
4 | ========================================
5 |
6 | .. toctree::
7 | :hidden:
8 | :maxdepth: 2
9 | :caption: Contents:
10 |
11 | intro/installation
12 | pipelines/ItemPipeline
13 | pipelines/MongoDB
14 |
15 | items
16 | settings
17 | signals
18 |
19 | :doc:`intro/installation`
20 | Installation
21 |
22 | :doc:`pipelines/ItemPipeline`
23 | The root class for all pipelines
24 |
25 | :doc:`pipelines/MongoDB`
26 | Save items into MongoDB
27 |
28 | :doc:`items`
29 | Items used in these pipelines
30 |
31 | :doc:`settings`
32 | Settings for these pipelines
33 |
34 | :doc:`signals`
35 | Signals used in these pipelines
36 |
37 | Indices and tables
38 | ==================
39 |
40 | * :ref:`genindex`
41 | * :ref:`modindex`
42 | * :ref:`search`
43 |
--------------------------------------------------------------------------------
/docs/source/intro/installation.rst:
--------------------------------------------------------------------------------
1 | .. _intro-installation:
2 |
3 | ============
4 | Installation
5 | ============
6 |
--------------------------------------------------------------------------------
/docs/source/items.rst:
--------------------------------------------------------------------------------
1 | .. _items:
2 |
3 | =====
4 | Items
5 | =====
6 |
7 | .. automodule:: scrapy_pipelines.items
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/source/pipelines/ItemPipeline.rst:
--------------------------------------------------------------------------------
1 | .. _pipelines-itempipeline:
2 |
3 | ============
4 | ItemPipeline
5 | ============
6 |
7 | .. autoclass:: scrapy_pipelines.pipelines.ItemPipeline
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/source/pipelines/MongoDB.rst:
--------------------------------------------------------------------------------
1 | .. _pipelines-MongoDB:
2 |
3 | ================
4 | Pipeline MongoDB
5 | ================
6 |
7 | .. autoclass:: scrapy_pipelines.pipelines.mongo.MongoPipeline
8 | :members:
--------------------------------------------------------------------------------
/docs/source/settings.rst:
--------------------------------------------------------------------------------
1 | .. _settings:
2 |
3 | ========
4 | Settings
5 | ========
6 |
7 | .. automodule:: scrapy_pipelines.settings
8 | :members:
9 |
--------------------------------------------------------------------------------
/docs/source/signals.rst:
--------------------------------------------------------------------------------
1 | .. _signals:
2 |
3 | =======
4 | Signals
5 | =======
6 |
7 | .. automodule:: scrapy_pipelines.signals
8 | :members:
9 |
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True
--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | ignore=_version.py
3 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.black]
2 | exclude = '''
3 | (
4 | /(
5 | \.eggs # exclude a few common directories in the
6 | | \.git # root of the project
7 | | \.hg
8 | | \.mypy_cache
9 | | \.tox
10 | | \.venv
11 | | _build
12 | | buck-out
13 | | build
14 | | dist
15 | )/
16 | | foo.py # also separately exclude a file named foo.py in
17 | # the root of the project
18 | | scrapy_pipelines/_version.py
19 | )
20 | '''
21 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts =
3 | --cov=scrapy_pipelines tests/
4 | --ignore=docker/
5 | --numprocesses=auto
6 |
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": [
3 | "config:base"
4 | ]
5 | }
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | txmongo==19.2.0
2 | scrapy==2.4.1
3 |
--------------------------------------------------------------------------------
/scrapy_pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The pipelines used to save items
3 | """
4 | from ._version import get_versions
5 |
6 | __version__ = get_versions()["version"]
7 | del get_versions
8 |
--------------------------------------------------------------------------------
/scrapy_pipelines/_version.py:
--------------------------------------------------------------------------------
1 |
2 | # This file helps to compute a version number in source trees obtained from
3 | # git-archive tarball (such as those provided by githubs download-from-tag
4 | # feature). Distribution tarballs (built by setup.py sdist) and build
5 | # directories (produced by setup.py build) will contain a much shorter file
6 | # that just contains the computed version number.
7 |
8 | # This file is released into the public domain. Generated by
9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
10 |
11 | """Git implementation of _version.py."""
12 |
13 | import errno
14 | import os
15 | import re
16 | import subprocess
17 | import sys
18 |
19 |
20 | def get_keywords():
21 | """Get the keywords needed to look up the version information."""
22 | # these strings will be replaced by git during git-archive.
23 | # setup.py/versioneer.py will grep for the variable names, so they must
24 | # each be defined on a line of their own. _version.py will just call
25 | # get_keywords().
26 | git_refnames = " (HEAD -> master)"
27 | git_full = "667b87c8ff490e87d95d03ca0aaa715b9ceda47d"
28 | git_date = "2021-03-10 10:20:01 +1100"
29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
30 | return keywords
31 |
32 |
33 | class VersioneerConfig:
34 | """Container for Versioneer configuration parameters."""
35 |
36 |
37 | def get_config():
38 | """Create, populate and return the VersioneerConfig() object."""
39 | # these strings are filled in when 'setup.py versioneer' creates
40 | # _version.py
41 | cfg = VersioneerConfig()
42 | cfg.VCS = "git"
43 | cfg.style = "pep440"
44 | cfg.tag_prefix = ""
45 | cfg.parentdir_prefix = ""
46 | cfg.versionfile_source = "scrapy_pipelines/_version.py"
47 | cfg.verbose = False
48 | return cfg
49 |
50 |
51 | class NotThisMethod(Exception):
52 | """Exception raised if a method is not valid for the current scenario."""
53 |
54 |
55 | LONG_VERSION_PY = {}
56 | HANDLERS = {}
57 |
58 |
59 | def register_vcs_handler(vcs, method): # decorator
60 | """Decorator to mark a method as the handler for a particular VCS."""
61 | def decorate(f):
62 | """Store f in HANDLERS[vcs][method]."""
63 | if vcs not in HANDLERS:
64 | HANDLERS[vcs] = {}
65 | HANDLERS[vcs][method] = f
66 | return f
67 | return decorate
68 |
69 |
70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
71 | env=None):
72 | """Call the given command(s)."""
73 | assert isinstance(commands, list)
74 | p = None
75 | for c in commands:
76 | try:
77 | dispcmd = str([c] + args)
78 | # remember shell=False, so use git.cmd on windows, not just git
79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env,
80 | stdout=subprocess.PIPE,
81 | stderr=(subprocess.PIPE if hide_stderr
82 | else None))
83 | break
84 | except EnvironmentError:
85 | e = sys.exc_info()[1]
86 | if e.errno == errno.ENOENT:
87 | continue
88 | if verbose:
89 | print("unable to run %s" % dispcmd)
90 | print(e)
91 | return None, None
92 | else:
93 | if verbose:
94 | print("unable to find command, tried %s" % (commands,))
95 | return None, None
96 | stdout = p.communicate()[0].strip()
97 | if sys.version_info[0] >= 3:
98 | stdout = stdout.decode()
99 | if p.returncode != 0:
100 | if verbose:
101 | print("unable to run %s (error)" % dispcmd)
102 | print("stdout was %s" % stdout)
103 | return None, p.returncode
104 | return stdout, p.returncode
105 |
106 |
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 | """Try to determine the version from the parent directory name.
109 |
110 | Source tarballs conventionally unpack into a directory that includes both
111 | the project name and a version string. We will also support searching up
112 | two directory levels for an appropriately named parent directory
113 | """
114 | rootdirs = []
115 |
116 | for i in range(3):
117 | dirname = os.path.basename(root)
118 | if dirname.startswith(parentdir_prefix):
119 | return {"version": dirname[len(parentdir_prefix):],
120 | "full-revisionid": None,
121 | "dirty": False, "error": None, "date": None}
122 | else:
123 | rootdirs.append(root)
124 | root = os.path.dirname(root) # up a level
125 |
126 | if verbose:
127 | print("Tried directories %s but none started with prefix %s" %
128 | (str(rootdirs), parentdir_prefix))
129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 |
131 |
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 | """Extract version information from the given file."""
135 | # the code embedded in _version.py can just fetch the value of these
136 | # keywords. When used from setup.py, we don't want to import _version.py,
137 | # so we do it with a regexp instead. This function is not used from
138 | # _version.py.
139 | keywords = {}
140 | try:
141 | f = open(versionfile_abs, "r")
142 | for line in f.readlines():
143 | if line.strip().startswith("git_refnames ="):
144 | mo = re.search(r'=\s*"(.*)"', line)
145 | if mo:
146 | keywords["refnames"] = mo.group(1)
147 | if line.strip().startswith("git_full ="):
148 | mo = re.search(r'=\s*"(.*)"', line)
149 | if mo:
150 | keywords["full"] = mo.group(1)
151 | if line.strip().startswith("git_date ="):
152 | mo = re.search(r'=\s*"(.*)"', line)
153 | if mo:
154 | keywords["date"] = mo.group(1)
155 | f.close()
156 | except EnvironmentError:
157 | pass
158 | return keywords
159 |
160 |
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 | """Get version information from git keywords."""
164 | if not keywords:
165 | raise NotThisMethod("no keywords at all, weird")
166 | date = keywords.get("date")
167 | if date is not None:
168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 | # -like" string, which we must then edit to make compliant), because
171 | # it's been around since git-1.5.3, and it's too difficult to
172 | # discover which version we're using, or to work around using an
173 | # older one.
174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 | refnames = keywords["refnames"].strip()
176 | if refnames.startswith("$Format"):
177 | if verbose:
178 | print("keywords are unexpanded, not using")
179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 | TAG = "tag: "
184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 | if not tags:
186 | # Either we're using git < 1.8.3, or there really are no tags. We use
187 | # a heuristic: assume all version tags have a digit. The old git %d
188 | # expansion behaves like git log --decorate=short and strips out the
189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 | # between branches and tags. By ignoring refnames without digits, we
191 | # filter out many common branch names like "release" and
192 | # "stabilization", as well as "HEAD" and "master".
193 | tags = set([r for r in refs if re.search(r'\d', r)])
194 | if verbose:
195 | print("discarding '%s', no digits" % ",".join(refs - tags))
196 | if verbose:
197 | print("likely tags: %s" % ",".join(sorted(tags)))
198 | for ref in sorted(tags):
199 | # sorting will prefer e.g. "2.0" over "2.0rc1"
200 | if ref.startswith(tag_prefix):
201 | r = ref[len(tag_prefix):]
202 | if verbose:
203 | print("picking %s" % r)
204 | return {"version": r,
205 | "full-revisionid": keywords["full"].strip(),
206 | "dirty": False, "error": None,
207 | "date": date}
208 | # no suitable tags, so version is "0+unknown", but full hex is still there
209 | if verbose:
210 | print("no suitable tags, using unknown + full revision id")
211 | return {"version": "0+unknown",
212 | "full-revisionid": keywords["full"].strip(),
213 | "dirty": False, "error": "no suitable tags", "date": None}
214 |
215 |
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 | """Get version from 'git describe' in the root of the source tree.
219 |
220 | This only gets called if the git-archive 'subst' keywords were *not*
221 | expanded, and _version.py hasn't already been rewritten with a short
222 | version string, meaning we're inside a checked out source tree.
223 | """
224 | GITS = ["git"]
225 | if sys.platform == "win32":
226 | GITS = ["git.cmd", "git.exe"]
227 |
228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 | hide_stderr=True)
230 | if rc != 0:
231 | if verbose:
232 | print("Directory %s not under git control" % root)
233 | raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 |
235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 | # if there isn't one, this yields HEX[-dirty] (no NUM)
237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 | "--always", "--long",
239 | "--match", "%s*" % tag_prefix],
240 | cwd=root)
241 | # --long was added in git-1.5.5
242 | if describe_out is None:
243 | raise NotThisMethod("'git describe' failed")
244 | describe_out = describe_out.strip()
245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 | if full_out is None:
247 | raise NotThisMethod("'git rev-parse' failed")
248 | full_out = full_out.strip()
249 |
250 | pieces = {}
251 | pieces["long"] = full_out
252 | pieces["short"] = full_out[:7] # maybe improved later
253 | pieces["error"] = None
254 |
255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 | # TAG might have hyphens.
257 | git_describe = describe_out
258 |
259 | # look for -dirty suffix
260 | dirty = git_describe.endswith("-dirty")
261 | pieces["dirty"] = dirty
262 | if dirty:
263 | git_describe = git_describe[:git_describe.rindex("-dirty")]
264 |
265 | # now we have TAG-NUM-gHEX or HEX
266 |
267 | if "-" in git_describe:
268 | # TAG-NUM-gHEX
269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 | if not mo:
271 | # unparseable. Maybe git-describe is misbehaving?
272 | pieces["error"] = ("unable to parse git-describe output: '%s'"
273 | % describe_out)
274 | return pieces
275 |
276 | # tag
277 | full_tag = mo.group(1)
278 | if not full_tag.startswith(tag_prefix):
279 | if verbose:
280 | fmt = "tag '%s' doesn't start with prefix '%s'"
281 | print(fmt % (full_tag, tag_prefix))
282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 | % (full_tag, tag_prefix))
284 | return pieces
285 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 |
287 | # distance: number of commits since tag
288 | pieces["distance"] = int(mo.group(2))
289 |
290 | # commit: short hex revision ID
291 | pieces["short"] = mo.group(3)
292 |
293 | else:
294 | # HEX: no tags
295 | pieces["closest-tag"] = None
296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 | cwd=root)
298 | pieces["distance"] = int(count_out) # total number of commits
299 |
300 | # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 | cwd=root)[0].strip()
303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 |
305 | return pieces
306 |
307 |
308 | def plus_or_dot(pieces):
309 | """Return a + if we don't already have one, else return a ."""
310 | if "+" in pieces.get("closest-tag", ""):
311 | return "."
312 | return "+"
313 |
314 |
315 | def render_pep440(pieces):
316 | """Build up version string, with post-release "local version identifier".
317 |
318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 |
321 | Exceptions:
322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 | """
324 | if pieces["closest-tag"]:
325 | rendered = pieces["closest-tag"]
326 | if pieces["distance"] or pieces["dirty"]:
327 | rendered += plus_or_dot(pieces)
328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 | if pieces["dirty"]:
330 | rendered += ".dirty"
331 | else:
332 | # exception #1
333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 | pieces["short"])
335 | if pieces["dirty"]:
336 | rendered += ".dirty"
337 | return rendered
338 |
339 |
340 | def render_pep440_pre(pieces):
341 | """TAG[.post.devDISTANCE] -- No -dirty.
342 |
343 | Exceptions:
344 | 1: no tags. 0.post.devDISTANCE
345 | """
346 | if pieces["closest-tag"]:
347 | rendered = pieces["closest-tag"]
348 | if pieces["distance"]:
349 | rendered += ".post.dev%d" % pieces["distance"]
350 | else:
351 | # exception #1
352 | rendered = "0.post.dev%d" % pieces["distance"]
353 | return rendered
354 |
355 |
356 | def render_pep440_post(pieces):
357 | """TAG[.postDISTANCE[.dev0]+gHEX] .
358 |
359 | The ".dev0" means dirty. Note that .dev0 sorts backwards
360 | (a dirty tree will appear "older" than the corresponding clean one),
361 | but you shouldn't be releasing software with -dirty anyways.
362 |
363 | Exceptions:
364 | 1: no tags. 0.postDISTANCE[.dev0]
365 | """
366 | if pieces["closest-tag"]:
367 | rendered = pieces["closest-tag"]
368 | if pieces["distance"] or pieces["dirty"]:
369 | rendered += ".post%d" % pieces["distance"]
370 | if pieces["dirty"]:
371 | rendered += ".dev0"
372 | rendered += plus_or_dot(pieces)
373 | rendered += "g%s" % pieces["short"]
374 | else:
375 | # exception #1
376 | rendered = "0.post%d" % pieces["distance"]
377 | if pieces["dirty"]:
378 | rendered += ".dev0"
379 | rendered += "+g%s" % pieces["short"]
380 | return rendered
381 |
382 |
383 | def render_pep440_old(pieces):
384 | """TAG[.postDISTANCE[.dev0]] .
385 |
386 | The ".dev0" means dirty.
387 |
388 | Eexceptions:
389 | 1: no tags. 0.postDISTANCE[.dev0]
390 | """
391 | if pieces["closest-tag"]:
392 | rendered = pieces["closest-tag"]
393 | if pieces["distance"] or pieces["dirty"]:
394 | rendered += ".post%d" % pieces["distance"]
395 | if pieces["dirty"]:
396 | rendered += ".dev0"
397 | else:
398 | # exception #1
399 | rendered = "0.post%d" % pieces["distance"]
400 | if pieces["dirty"]:
401 | rendered += ".dev0"
402 | return rendered
403 |
404 |
405 | def render_git_describe(pieces):
406 | """TAG[-DISTANCE-gHEX][-dirty].
407 |
408 | Like 'git describe --tags --dirty --always'.
409 |
410 | Exceptions:
411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
412 | """
413 | if pieces["closest-tag"]:
414 | rendered = pieces["closest-tag"]
415 | if pieces["distance"]:
416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 | else:
418 | # exception #1
419 | rendered = pieces["short"]
420 | if pieces["dirty"]:
421 | rendered += "-dirty"
422 | return rendered
423 |
424 |
425 | def render_git_describe_long(pieces):
426 | """TAG-DISTANCE-gHEX[-dirty].
427 |
428 | Like 'git describe --tags --dirty --always -long'.
429 | The distance/hash is unconditional.
430 |
431 | Exceptions:
432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
433 | """
434 | if pieces["closest-tag"]:
435 | rendered = pieces["closest-tag"]
436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 | else:
438 | # exception #1
439 | rendered = pieces["short"]
440 | if pieces["dirty"]:
441 | rendered += "-dirty"
442 | return rendered
443 |
444 |
445 | def render(pieces, style):
446 | """Render the given version pieces into the requested style."""
447 | if pieces["error"]:
448 | return {"version": "unknown",
449 | "full-revisionid": pieces.get("long"),
450 | "dirty": None,
451 | "error": pieces["error"],
452 | "date": None}
453 |
454 | if not style or style == "default":
455 | style = "pep440" # the default
456 |
457 | if style == "pep440":
458 | rendered = render_pep440(pieces)
459 | elif style == "pep440-pre":
460 | rendered = render_pep440_pre(pieces)
461 | elif style == "pep440-post":
462 | rendered = render_pep440_post(pieces)
463 | elif style == "pep440-old":
464 | rendered = render_pep440_old(pieces)
465 | elif style == "git-describe":
466 | rendered = render_git_describe(pieces)
467 | elif style == "git-describe-long":
468 | rendered = render_git_describe_long(pieces)
469 | else:
470 | raise ValueError("unknown style '%s'" % style)
471 |
472 | return {"version": rendered, "full-revisionid": pieces["long"],
473 | "dirty": pieces["dirty"], "error": None,
474 | "date": pieces.get("date")}
475 |
476 |
477 | def get_versions():
478 | """Get version information or return default if unable to do so."""
479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 | # __file__, we can work backwards from there to the root. Some
481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 | # case we can only use expanded keywords.
483 |
484 | cfg = get_config()
485 | verbose = cfg.verbose
486 |
487 | try:
488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 | verbose)
490 | except NotThisMethod:
491 | pass
492 |
493 | try:
494 | root = os.path.realpath(__file__)
495 | # versionfile_source is the relative path from the top of the source
496 | # tree (where the .git directory might live) to this file. Invert
497 | # this to find the root from __file__.
498 | for i in cfg.versionfile_source.split('/'):
499 | root = os.path.dirname(root)
500 | except NameError:
501 | return {"version": "0+unknown", "full-revisionid": None,
502 | "dirty": None,
503 | "error": "unable to find root of source tree",
504 | "date": None}
505 |
506 | try:
507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 | return render(pieces, cfg.style)
509 | except NotThisMethod:
510 | pass
511 |
512 | try:
513 | if cfg.parentdir_prefix:
514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 | except NotThisMethod:
516 | pass
517 |
518 | return {"version": "0+unknown", "full-revisionid": None,
519 | "dirty": None,
520 | "error": "unable to compute version", "date": None}
521 |
--------------------------------------------------------------------------------
/scrapy_pipelines/items.py:
--------------------------------------------------------------------------------
1 | """
2 | A customized item for MongoDB
3 | """
4 | from scrapy.item import Field, Item
5 |
6 |
7 | class BSONItem(Item):
8 | """
9 | Pymongo creates `_id` automatcially in the object after inserting
10 | """
11 |
12 | _id = Field()
13 |
--------------------------------------------------------------------------------
/scrapy_pipelines/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Item pipeline
3 |
4 | See documentation in docs/item-pipeline.rst
5 | """
6 | import logging
7 | from abc import ABC, abstractmethod
8 |
9 | from scrapy.crawler import Crawler
10 | from scrapy.item import Item
11 | from scrapy.settings import SETTINGS_PRIORITIES, Settings
12 | from scrapy.spiders import Spider
13 |
14 | from scrapy_pipelines.settings import default_settings, unfreeze_settings
15 |
16 | LOGGER = logging.getLogger(__name__)
17 |
18 |
19 | class ItemPipeline(ABC):
20 | """
21 | Abstract Class for the item pipeline
22 | """
23 |
24 | def __init__(self, settings: Settings = None):
25 | """
26 |
27 | :param settings:
28 | :type settings: Settings
29 | """
30 | self.settings = settings
31 | self.crawler: Crawler = None
32 |
33 | @classmethod
34 | def from_crawler(cls, crawler: Crawler):
35 | """
36 |
37 | :param crawler:
38 | :type crawler: Crawler
39 | :return:
40 | :rtype:
41 | """
42 | with unfreeze_settings(crawler.settings) as settings:
43 | settings.setmodule(
44 | module=default_settings, priority=SETTINGS_PRIORITIES["default"]
45 | )
46 | try:
47 | pipe = cls.from_settings(crawler.settings)
48 | except AttributeError:
49 | pipe = cls()
50 | pipe.crawler = crawler
51 | return pipe
52 |
53 | @classmethod
54 | @abstractmethod
55 | def from_settings(cls, settings: Settings):
56 | """
57 |
58 | :param settings:
59 | :type settings: Settings
60 | :return:
61 | :rtype:
62 | """
63 | return cls(settings=settings)
64 |
65 | @abstractmethod
66 | def open_spider(self, spider: Spider):
67 | """
68 |
69 | :param spider:
70 | :type spider: Spider
71 | :return:
72 | :rtype:
73 | """
74 |
75 | @abstractmethod
76 | def close_spider(self, spider: Spider):
77 | """
78 |
79 | :param spider:
80 | :type spider: Spider
81 | :return:
82 | :rtype:
83 | """
84 |
85 | @abstractmethod
86 | def process_item(self, item: Item, spider: Spider) -> Item:
87 | """
88 |
89 | :param item:
90 | :type item: Item
91 | :param spider:
92 | :type spider: Spider
93 | :return:
94 | :rtype: Item
95 | """
96 |
--------------------------------------------------------------------------------
/scrapy_pipelines/pipelines/mongo.py:
--------------------------------------------------------------------------------
1 | """
2 | MongoDB Async Item Pipeline with txmongo
3 | """
4 | import inspect
5 | import logging
6 | from typing import Callable, Dict, Tuple
7 |
8 | from pymongo.errors import OperationFailure
9 | from pymongo.results import InsertOneResult
10 | from scrapy.crawler import Crawler
11 | from scrapy.item import Item
12 | from scrapy.settings import Settings
13 | from scrapy.spiders import Spider
14 | from twisted.internet.defer import inlineCallbacks
15 | from txmongo.collection import Collection
16 | from txmongo.connection import ConnectionPool
17 | from txmongo.database import Database
18 | from txmongo.filter import sort as txsort
19 |
20 | from scrapy_pipelines.pipelines import ItemPipeline
21 | from scrapy_pipelines.signals import item_id
22 |
23 | LOGGER = logging.getLogger(__name__)
24 |
25 |
26 | def get_args(func: Callable) -> Tuple[str, ...]:
27 | """
28 |
29 | :param func:
30 | :type func: callable
31 | :return:
32 | :rtype: tuple
33 | """
34 | sig = inspect.signature(func)
35 | return tuple(sig.parameters.keys())
36 |
37 |
38 | class MongoPipeline(ItemPipeline):
39 | """
40 | A pipeline saved into MongoDB asynchronously with txmongo
41 | """
42 |
43 | def __init__(self, uri: str, settings: Settings):
44 | """
45 |
46 | :param uri:
47 | :type uri: str
48 | :param settings:
49 | :type settings:
50 | """
51 | super(MongoPipeline, self).__init__(settings=settings)
52 |
53 | self.uri: str = uri
54 |
55 | self.mongo: ConnectionPool = None
56 | self.database: Database = None
57 | self.collection: Collection = None
58 |
59 | @classmethod
60 | def from_crawler(cls, crawler: Crawler):
61 | """
62 |
63 | :param crawler:
64 | :type crawler: Crawler
65 | :return:
66 | :rtype: MongoPipeline
67 | """
68 | pipe = super().from_crawler(crawler=crawler)
69 | crawler.signals.connect(receiver=pipe.process_item_id, signal=item_id)
70 | return pipe
71 |
72 | @classmethod
73 | def from_settings(cls, settings: Settings):
74 | """
75 |
76 | :param settings:
77 | :type settings: Settings
78 | :return:
79 | :rtype: MongoPipeline
80 | """
81 | uri = settings["PIPELINE_MONGO_URI"]
82 | return cls(uri=uri, settings=settings)
83 |
84 | def _get_args_from_settings(self, func: Callable) -> Dict[str, str]:
85 | """
86 |
87 | :param func:
88 | :type func: Callable
89 | :return:
90 | :rtype: Dict[str, str]
91 | """
92 | func_args = dict()
93 | for arg in get_args(func):
94 | key = "PIPELINE_MONGO_{arg}".format(arg=arg.upper())
95 | if key in self.settings:
96 | func_args.update({arg: self.settings[key]})
97 | return func_args
98 |
99 | def _get_callable(self, callable_: Callable, **kwargs):
100 | """
101 |
102 | :param callable_:
103 | :param kwargs:
104 | :return:
105 | :rtype:
106 | """
107 | args = self._get_args_from_settings(func=callable_)
108 | args.update(kwargs)
109 | return callable_(**args)
110 |
111 | @inlineCallbacks
112 | def open_spider(self, spider: Spider):
113 | """
114 |
115 | :param spider:
116 | :type spider: Spider
117 | :return:
118 | :rtype:
119 | """
120 | self.mongo = yield self._get_callable(ConnectionPool)
121 | self.database = yield self._get_callable(
122 | Database,
123 | factory=self.mongo,
124 | database_name=self.settings.get("PIPELINE_MONGO_DATABASE"),
125 | )
126 | if all(
127 | (
128 | self.settings.get("PIPELINE_MONGO_USERNAME"),
129 | self.settings.get("PIPELINE_MONGO_PASSWORD"),
130 | )
131 | ):
132 | yield self._get_callable(
133 | self.database.authenticate,
134 | name=self.settings.get("PIPELINE_MONGO_USERNAME"),
135 | )
136 | try:
137 | yield self.database.command("listCollections")
138 | except OperationFailure as err:
139 | LOGGER.error(str(err))
140 | self.crawler.engine.close_spider(spider=spider, reason=str(err))
141 | else:
142 | self.collection = yield self._get_callable(
143 | Collection,
144 | database=self.database,
145 | name=self.settings.get("PIPELINE_MONGO_COLLECTION"),
146 | )
147 | yield self.create_indexes(spider=spider)
148 |
149 | LOGGER.info('MongoPipeline is opened with "%s"', self.uri)
150 |
151 | @inlineCallbacks
152 | def close_spider(self, spider: Spider):
153 | """
154 |
155 | :param spider:
156 | :type spider: Spider
157 | :return:
158 | :rtype:
159 | """
160 | yield self.mongo.disconnect()
161 |
162 | LOGGER.info('MongoPipeline is closed with "%s"', self.uri)
163 |
164 | @inlineCallbacks
165 | def create_indexes(self, spider: Spider):
166 | """
167 |
168 | :param spider:
169 | :type spider: Spider
170 | :return:
171 | :rtype:
172 | """
173 | indexes = self.settings.get("PIPELINE_MONGO_INDEXES", list())
174 | for field, _order, *args in indexes:
175 | sort_fields = txsort(_order(field))
176 | try:
177 | kwargs = args[0]
178 | except IndexError:
179 | kwargs = {}
180 | _ = yield self.collection.create_index(sort_fields, **kwargs)
181 |
182 | @inlineCallbacks
183 | def process_item(self, item: Item, spider: Spider) -> Item:
184 | """
185 |
186 | :param item:
187 | :type item: Item
188 | :param spider:
189 | :type spider: Spider
190 | :return:
191 | :rtype: Item
192 | """
193 | result = yield self.collection.insert_one(document=dict(item))
194 |
195 | _item = self.item_completed(result, item, spider)
196 |
197 | return _item
198 |
199 | def item_completed(self, result: str, item: Item, spider: Spider) -> Item:
200 | """
201 |
202 | :param result:
203 | :type result: str
204 | :param item:
205 | :type item: Item
206 | :param spider:
207 | :type spider: Spider
208 | :return:
209 | :rtype: Item
210 | """
211 | return item
212 |
213 | @inlineCallbacks
214 | def process_item_id(
215 | self, signal: object, sender: Crawler, item: Item, spider: Spider
216 | ) -> InsertOneResult:
217 | """
218 |
219 | :param signal:
220 | :type signal: object
221 | :param sender:
222 | :type sender: Crawler
223 | :param item:
224 | :type item: Item
225 | :param spider:
226 | :type spider: Spider
227 | :return:
228 | :rtype: InsertOneResult
229 | """
230 | result = yield self.collection.insert_one(document=dict(item))
231 |
232 | return result
233 |
--------------------------------------------------------------------------------
/scrapy_pipelines/settings/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The utilities used in settings module
3 | """
4 | from contextlib import contextmanager
5 | from typing import Generator
6 |
7 | from scrapy.settings import Settings
8 |
9 |
10 | @contextmanager
11 | def unfreeze_settings(settings: Settings) -> Generator[Settings, None, None]:
12 | """
13 |
14 | :param settings:
15 | :type settings: Settings
16 | :return:
17 | :rtype: Generator[Settings, None, None]
18 | """
19 | original_status, settings.frozen = settings.frozen, False
20 | try:
21 | yield settings
22 | finally:
23 | settings.frozen = original_status
24 |
--------------------------------------------------------------------------------
/scrapy_pipelines/settings/default_settings.py:
--------------------------------------------------------------------------------
1 | """
2 | This module contains the default values for all settings used by this item
3 | pipeline.
4 |
5 | For more information about these settings you can read the settings
6 | documentation in docs/topics/settings.rst
7 |
8 | Scrapy developers, if you add a setting here remember to:
9 |
10 | * add it in alphabetical order
11 | * group similar settings without leaving blank lines
12 | * add its documentation to the available settings documentation
13 | (docs/topics/settings.rst)
14 |
15 | """
16 | PIPELINE_MONGO_URI = "mongodb://127.0.0.1:27017"
17 | # PIPELINE_MONGO_POOL_SIZE = 1
18 | # PIPELINE_MONGO_SSL_CONTEXT_FACTORY = None
19 | # PIPELINE_MONGO_PING_INTERVAL = 10
20 | # PIPELINE_MONGO_PING_TIMEOUT = 10
21 |
22 | PIPELINE_MONGO_DATABASE = "scrapy_project_database"
23 | # PIPELINE_MONGO_WRITE_CONCERN = None
24 | # PIPELINE_MONGO_CODEC_OPTION = None
25 |
26 | PIPELINE_MONGO_USERNAME = "USERNAME"
27 | PIPELINE_MONGO_PASSWORD = "PASSWORD"
28 | # PIPELINE_MONGO_MECHANISM = "DEFAULT"
29 |
30 | PIPELINE_MONGO_COLLECTION = "scrapy_project_collection"
31 |
32 | # PIPELINE_MONGO_OPTIONS_ = "OPTIONS_"
33 |
34 | # PIPELINE_MONGO_INDEXES = "INDEXES"
35 |
36 | # PIPELINE_MONGO_PROCESS_ITEM = "PROCESS_ITEM"
37 |
38 | # from txmongo.filter import ASCENDING, DESCENDING
39 | #
40 | # PIPELINE_MONGO_INDEXES = [
41 | # ("key_asc", ASCENDING, {"name": "index_key_asc"}),
42 | # ("key_des", DESCENDING, {"name": "index_key_des"}),
43 | # ("key_unique", DESCENDING, {"name": "index_key_unique", "unique": True}),
44 | # ]
45 |
--------------------------------------------------------------------------------
/scrapy_pipelines/signals.py:
--------------------------------------------------------------------------------
1 | """
2 | Signals for the pipelines
3 | """
4 | item_id = object()
5 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = scrapy_pipelines/_version.py
5 | tag_prefix =
6 | parentdir_prefix =
7 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """
2 | Python package configuration
3 | """
4 | from setuptools import find_packages, setup
5 |
6 | import versioneer
7 |
8 | extras_require = {}
9 |
10 | with open("README.rst", "r") as fh:
11 | LONG_DESCRIPTION = fh.read()
12 |
13 | setup(
14 | name="Scrapy-Pipelines",
15 | version=versioneer.get_version(),
16 | cmdclass=versioneer.get_cmdclass(),
17 | url="https://github.com/scrapedia/scrapy-pipelines",
18 | description="A collection of scrapy item pipelines",
19 | long_description=LONG_DESCRIPTION,
20 | author="Scrapedia",
21 | author_email="Scrapedia@outlook.com",
22 | maintainer="Scrapedia",
23 | maintainer_email="Scrapedia@outlook.com",
24 | license="GPLv3",
25 | packages=find_packages(exclude=("tests", "tests.*")),
26 | include_package_data=True,
27 | zip_safe=False,
28 | classifiers=[
29 | "Framework :: Scrapy",
30 | "Development Status :: 2 - Pre-Alpha",
31 | "Environment :: Plugins",
32 | "Intended Audience :: Developers",
33 | "Operating System :: OS Independent",
34 | "Programming Language :: Python",
35 | "Programming Language :: Python :: 3",
36 | "Programming Language :: Python :: 3.6",
37 | "Programming Language :: Python :: 3.7",
38 | "Topic :: Internet :: WWW/HTTP",
39 | "Topic :: Software Development :: Libraries :: Python Modules",
40 | ],
41 | install_requires=["scrapy", "txmongo"],
42 | extras_require=extras_require,
43 | )
44 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapedia/scrapy-pipelines/667b87c8ff490e87d95d03ca0aaa715b9ceda47d/tests/__init__.py
--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest==5.4.1
2 | pytest-cov==2.8.1
3 | pytest-xdist==1.32.0
--------------------------------------------------------------------------------
/tests/test_pipelines_mongo.py:
--------------------------------------------------------------------------------
1 | """
2 | Test MongoPipeline
3 | """
4 | from bson.son import SON
5 | from pymongo.results import InsertOneResult
6 | from scrapy import Spider
7 | from scrapy.item import Field, Item
8 | from scrapy.settings import Settings
9 | from twisted.internet.defer import inlineCallbacks
10 | from twisted.trial.unittest import TestCase
11 | from txmongo.filter import ASCENDING, DESCENDING
12 |
13 | from scrapy_pipelines.pipelines.mongo import MongoPipeline, get_args
14 | from scrapy_pipelines.settings import default_settings
15 |
16 |
17 | class TempItem(Item):
18 | """
19 | A item class just for test purpose
20 | """
21 |
22 | a = Field()
23 | b = Field()
24 |
25 |
26 | class TestGetArgs(TestCase):
27 | """
28 | Test the functions in MongoPipeline
29 | """
30 |
31 | def test_get_args(self):
32 | """
33 |
34 | :return:
35 | """
36 |
37 | def test_func(arg_1, arg_2, arg_3):
38 | return arg_1, arg_2, arg_3
39 |
40 | args = get_args(test_func)
41 |
42 | self.assertSequenceEqual(args, ["arg_1", "arg_2", "arg_3"])
43 |
44 |
45 | class TestMongoPipeline(TestCase):
46 | """
47 | Test MongoPipeline
48 | """
49 |
50 | maxDiff = None
51 | mongo_settings = {
52 | "PIPELINE_MONGO_URI": "mongodb://127.0.0.1:27017",
53 | "PIPELINE_MONGO_DATABASE": "test_db",
54 | "PIPELINE_MONGO_USERNAME": "test_username",
55 | "PIPELINE_MONGO_PASSWORD": "test_password",
56 | "PIPELINE_MONGO_COLLECTION": "test_coll",
57 | "PIPELINE_MONGO_INDEXES": [
58 | ("test", ASCENDING),
59 | ("test_asc", ASCENDING, {"name": "index_test_asc"}),
60 | ("test_des", DESCENDING, {"name": "index_test_des"}),
61 | (
62 | "test_unique",
63 | DESCENDING,
64 | {
65 | "name": "index_test_unique",
66 | "unique": True,
67 | "partialFilterExpression": {"test_unique": {"$exists": True}},
68 | },
69 | ),
70 | ],
71 | }
72 |
73 | @inlineCallbacks
74 | def setUp(self) -> None:
75 | self.settings = Settings()
76 | self.settings.setmodule(module=default_settings)
77 | self.settings.setdict(self.mongo_settings)
78 | self.spider = Spider(name="TestMongoPipeline")
79 | self.pipe = MongoPipeline.from_settings(settings=self.settings)
80 | yield self.pipe.open_spider(spider=None)
81 |
82 | @inlineCallbacks
83 | def tearDown(self) -> None:
84 | yield self.pipe.close_spider(spider=None)
85 |
86 | @inlineCallbacks
87 | def test_create_indexes(self) -> None:
88 | """
89 |
90 | :return:
91 | """
92 | _index_info = {
93 | "_id_": {
94 | "key": SON([("_id", 1)]),
95 | "name": "_id_",
96 | "ns": "test_db.test_coll",
97 | "v": 2,
98 | },
99 | "test_1": {
100 | "key": SON([("test", 1)]),
101 | "name": "test_1",
102 | "ns": "test_db.test_coll",
103 | "v": 2,
104 | },
105 | "index_test_asc": {
106 | "key": SON([("test_asc", 1)]),
107 | "name": "index_test_asc",
108 | "ns": "test_db.test_coll",
109 | "v": 2,
110 | },
111 | "index_test_des": {
112 | "key": SON([("test_des", -1)]),
113 | "name": "index_test_des",
114 | "ns": "test_db.test_coll",
115 | "v": 2,
116 | },
117 | "index_test_unique": {
118 | "key": SON([("test_unique", -1)]),
119 | "name": "index_test_unique",
120 | "ns": "test_db.test_coll",
121 | "partialFilterExpression": {"test_unique": {"$exists": True}},
122 | "unique": True,
123 | "v": 2,
124 | },
125 | }
126 | index_info = yield self.pipe.collection.index_information()
127 | self.assertDictEqual(index_info, _index_info)
128 |
129 | @inlineCallbacks
130 | def test_process_item(self):
131 | """
132 |
133 | :return:
134 | """
135 | item = TempItem({"a": 0, "b": 1})
136 | result = yield self.pipe.process_item(item=item, spider=self.spider)
137 |
138 | self.assertDictEqual(dict(result), dict(item))
139 |
140 | def test_item_completed(self):
141 | """
142 |
143 | :return:
144 | """
145 | _item = TempItem({"a": 2, "b": 3})
146 | item = self.pipe.item_completed(None, _item, None)
147 | self.assertDictEqual(dict(_item), dict(item))
148 |
149 | @inlineCallbacks
150 | def test_process_item_id(self):
151 | """
152 |
153 | :return:
154 | """
155 | item = TempItem({"a": 4, "b": 5})
156 | result = yield self.pipe.process_item_id(
157 | signal=object(), sender=None, item=item, spider=self.spider
158 | )
159 |
160 | self.assertIsInstance(result, InsertOneResult)
161 |
--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
1 | """
2 | test the functions in settings
3 | """
4 | from unittest import TestCase
5 |
6 | from scrapy.settings import Settings
7 |
8 | from scrapy_pipelines.settings import unfreeze_settings
9 |
10 |
11 | class TestSettings(TestCase):
12 | """
13 | Test the functions in default settings
14 | """
15 |
16 | def setUp(self) -> None:
17 | self.settings = Settings()
18 | self.settings.freeze()
19 |
20 | def test_unfreeze_settings_succeed(self):
21 | """
22 |
23 | :return:
24 | """
25 | self.assertEqual(self.settings.frozen, True)
26 | with unfreeze_settings(self.settings):
27 | self.assertEqual(self.settings.frozen, False)
28 | self.assertEqual(self.settings.frozen, True)
29 |
30 | def test_unfreeze_settings_failed(self):
31 | """
32 |
33 | :return:
34 | """
35 | with self.assertRaises(Exception):
36 | with unfreeze_settings(self.settings):
37 | raise Exception
38 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py36,py37
3 |
4 | [testenv]
5 | commands =
6 | pytest
7 | deps =
8 | -rrequirements.txt
9 | -rtests/requirements.txt
10 | passenv =
11 | PYTHONPATH
12 |
13 | [docs]
14 | changedir = docs
15 | deps =
16 | -rdocs/requirements.txt
17 |
18 | [testenv:docs]
19 | changedir = {[docs]changedir}
20 | commands =
21 | sphinx-build -W -b html source {envtmpdir}/html
22 | deps = {[docs]deps}
23 |
24 | [testenv:docs-coverage]
25 | changedir = {[docs]changedir}
26 | commands =
27 | sphinx-build -b coverage source {envtmpdir}/coverage
28 | deps = {[docs]deps}
29 |
30 | [testenv:docs-links]
31 | changedir = {[docs]changedir}
32 | commands =
33 | sphinx-build -W -b linkcheck source {envtmpdir}/linkcheck
34 | deps = {[docs]deps}
35 |
--------------------------------------------------------------------------------
/versioneer.py:
--------------------------------------------------------------------------------
1 |
2 | # Version: 0.18
3 |
4 | """The Versioneer - like a rocketeer, but for versions.
5 |
6 | The Versioneer
7 | ==============
8 |
9 | * like a rocketeer, but for versions!
10 | * https://github.com/warner/python-versioneer
11 | * Brian Warner
12 | * License: Public Domain
13 | * Compatible With: python2.6, 2.7, 3.2, 3.3, 3.4, 3.5, 3.6, and pypy
14 | * [![Latest Version]
15 | (https://pypip.in/version/versioneer/badge.svg?style=flat)
16 | ](https://pypi.python.org/pypi/versioneer/)
17 | * [![Build Status]
18 | (https://travis-ci.org/warner/python-versioneer.png?branch=master)
19 | ](https://travis-ci.org/warner/python-versioneer)
20 |
21 | This is a tool for managing a recorded version number in distutils-based
22 | python projects. The goal is to remove the tedious and error-prone "update
23 | the embedded version string" step from your release process. Making a new
24 | release should be as easy as recording a new tag in your version-control
25 | system, and maybe making new tarballs.
26 |
27 |
28 | ## Quick Install
29 |
30 | * `pip install versioneer` to somewhere to your $PATH
31 | * add a `[versioneer]` section to your setup.cfg (see below)
32 | * run `versioneer install` in your source tree, commit the results
33 |
34 | ## Version Identifiers
35 |
36 | Source trees come from a variety of places:
37 |
38 | * a version-control system checkout (mostly used by developers)
39 | * a nightly tarball, produced by build automation
40 | * a snapshot tarball, produced by a web-based VCS browser, like github's
41 | "tarball from tag" feature
42 | * a release tarball, produced by "setup.py sdist", distributed through PyPI
43 |
44 | Within each source tree, the version identifier (either a string or a number,
45 | this tool is format-agnostic) can come from a variety of places:
46 |
47 | * ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
48 | about recent "tags" and an absolute revision-id
49 | * the name of the directory into which the tarball was unpacked
50 | * an expanded VCS keyword ($Id$, etc)
51 | * a `_version.py` created by some earlier build step
52 |
53 | For released software, the version identifier is closely related to a VCS
54 | tag. Some projects use tag names that include more than just the version
55 | string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
56 | needs to strip the tag prefix to extract the version identifier. For
57 | unreleased software (between tags), the version identifier should provide
58 | enough information to help developers recreate the same tree, while also
59 | giving them an idea of roughly how old the tree is (after version 1.2, before
60 | version 1.3). Many VCS systems can report a description that captures this,
61 | for example `git describe --tags --dirty --always` reports things like
62 | "0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
63 | 0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
64 | uncommitted changes.
65 |
66 | The version identifier is used for multiple purposes:
67 |
68 | * to allow the module to self-identify its version: `myproject.__version__`
69 | * to choose a name and prefix for a 'setup.py sdist' tarball
70 |
71 | ## Theory of Operation
72 |
73 | Versioneer works by adding a special `_version.py` file into your source
74 | tree, where your `__init__.py` can import it. This `_version.py` knows how to
75 | dynamically ask the VCS tool for version information at import time.
76 |
77 | `_version.py` also contains `$Revision$` markers, and the installation
78 | process marks `_version.py` to have this marker rewritten with a tag name
79 | during the `git archive` command. As a result, generated tarballs will
80 | contain enough information to get the proper version.
81 |
82 | To allow `setup.py` to compute a version too, a `versioneer.py` is added to
83 | the top level of your source tree, next to `setup.py` and the `setup.cfg`
84 | that configures it. This overrides several distutils/setuptools commands to
85 | compute the version when invoked, and changes `setup.py build` and `setup.py
86 | sdist` to replace `_version.py` with a small static file that contains just
87 | the generated version data.
88 |
89 | ## Installation
90 |
91 | See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
92 |
93 | ## Version-String Flavors
94 |
95 | Code which uses Versioneer can learn about its version string at runtime by
96 | importing `_version` from your main `__init__.py` file and running the
97 | `get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
98 | import the top-level `versioneer.py` and run `get_versions()`.
99 |
100 | Both functions return a dictionary with different flavors of version
101 | information:
102 |
103 | * `['version']`: A condensed version string, rendered using the selected
104 | style. This is the most commonly used value for the project's version
105 | string. The default "pep440" style yields strings like `0.11`,
106 | `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
107 | below for alternative styles.
108 |
109 | * `['full-revisionid']`: detailed revision identifier. For Git, this is the
110 | full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
111 |
112 | * `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
113 | commit date in ISO 8601 format. This will be None if the date is not
114 | available.
115 |
116 | * `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
117 | this is only accurate if run in a VCS checkout, otherwise it is likely to
118 | be False or None
119 |
120 | * `['error']`: if the version string could not be computed, this will be set
121 | to a string describing the problem, otherwise it will be None. It may be
122 | useful to throw an exception in setup.py if this is set, to avoid e.g.
123 | creating tarballs with a version string of "unknown".
124 |
125 | Some variants are more useful than others. Including `full-revisionid` in a
126 | bug report should allow developers to reconstruct the exact code being tested
127 | (or indicate the presence of local changes that should be shared with the
128 | developers). `version` is suitable for display in an "about" box or a CLI
129 | `--version` output: it can be easily compared against release notes and lists
130 | of bugs fixed in various releases.
131 |
132 | The installer adds the following text to your `__init__.py` to place a basic
133 | version in `YOURPROJECT.__version__`:
134 |
135 | from ._version import get_versions
136 | __version__ = get_versions()['version']
137 | del get_versions
138 |
139 | ## Styles
140 |
141 | The setup.cfg `style=` configuration controls how the VCS information is
142 | rendered into a version string.
143 |
144 | The default style, "pep440", produces a PEP440-compliant string, equal to the
145 | un-prefixed tag name for actual releases, and containing an additional "local
146 | version" section with more detail for in-between builds. For Git, this is
147 | TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
148 | --dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
149 | tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
150 | that this commit is two revisions ("+2") beyond the "0.11" tag. For released
151 | software (exactly equal to a known tag), the identifier will only contain the
152 | stripped tag, e.g. "0.11".
153 |
154 | Other styles are available. See [details.md](details.md) in the Versioneer
155 | source tree for descriptions.
156 |
157 | ## Debugging
158 |
159 | Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
160 | to return a version of "0+unknown". To investigate the problem, run `setup.py
161 | version`, which will run the version-lookup code in a verbose mode, and will
162 | display the full contents of `get_versions()` (including the `error` string,
163 | which may help identify what went wrong).
164 |
165 | ## Known Limitations
166 |
167 | Some situations are known to cause problems for Versioneer. This details the
168 | most significant ones. More can be found on Github
169 | [issues page](https://github.com/warner/python-versioneer/issues).
170 |
171 | ### Subprojects
172 |
173 | Versioneer has limited support for source trees in which `setup.py` is not in
174 | the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
175 | two common reasons why `setup.py` might not be in the root:
176 |
177 | * Source trees which contain multiple subprojects, such as
178 | [Buildbot](https://github.com/buildbot/buildbot), which contains both
179 | "master" and "slave" subprojects, each with their own `setup.py`,
180 | `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
181 | distributions (and upload multiple independently-installable tarballs).
182 | * Source trees whose main purpose is to contain a C library, but which also
183 | provide bindings to Python (and perhaps other langauges) in subdirectories.
184 |
185 | Versioneer will look for `.git` in parent directories, and most operations
186 | should get the right version string. However `pip` and `setuptools` have bugs
187 | and implementation details which frequently cause `pip install .` from a
188 | subproject directory to fail to find a correct version string (so it usually
189 | defaults to `0+unknown`).
190 |
191 | `pip install --editable .` should work correctly. `setup.py install` might
192 | work too.
193 |
194 | Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
195 | some later version.
196 |
197 | [Bug #38](https://github.com/warner/python-versioneer/issues/38) is tracking
198 | this issue. The discussion in
199 | [PR #61](https://github.com/warner/python-versioneer/pull/61) describes the
200 | issue from the Versioneer side in more detail.
201 | [pip PR#3176](https://github.com/pypa/pip/pull/3176) and
202 | [pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
203 | pip to let Versioneer work correctly.
204 |
205 | Versioneer-0.16 and earlier only looked for a `.git` directory next to the
206 | `setup.cfg`, so subprojects were completely unsupported with those releases.
207 |
208 | ### Editable installs with setuptools <= 18.5
209 |
210 | `setup.py develop` and `pip install --editable .` allow you to install a
211 | project into a virtualenv once, then continue editing the source code (and
212 | test) without re-installing after every change.
213 |
214 | "Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
215 | convenient way to specify executable scripts that should be installed along
216 | with the python package.
217 |
218 | These both work as expected when using modern setuptools. When using
219 | setuptools-18.5 or earlier, however, certain operations will cause
220 | `pkg_resources.DistributionNotFound` errors when running the entrypoint
221 | script, which must be resolved by re-installing the package. This happens
222 | when the install happens with one version, then the egg_info data is
223 | regenerated while a different version is checked out. Many setup.py commands
224 | cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
225 | a different virtualenv), so this can be surprising.
226 |
227 | [Bug #83](https://github.com/warner/python-versioneer/issues/83) describes
228 | this one, but upgrading to a newer version of setuptools should probably
229 | resolve it.
230 |
231 | ### Unicode version strings
232 |
233 | While Versioneer works (and is continually tested) with both Python 2 and
234 | Python 3, it is not entirely consistent with bytes-vs-unicode distinctions.
235 | Newer releases probably generate unicode version strings on py2. It's not
236 | clear that this is wrong, but it may be surprising for applications when then
237 | write these strings to a network connection or include them in bytes-oriented
238 | APIs like cryptographic checksums.
239 |
240 | [Bug #71](https://github.com/warner/python-versioneer/issues/71) investigates
241 | this question.
242 |
243 |
244 | ## Updating Versioneer
245 |
246 | To upgrade your project to a new release of Versioneer, do the following:
247 |
248 | * install the new Versioneer (`pip install -U versioneer` or equivalent)
249 | * edit `setup.cfg`, if necessary, to include any new configuration settings
250 | indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
251 | * re-run `versioneer install` in your source tree, to replace
252 | `SRC/_version.py`
253 | * commit any changed files
254 |
255 | ## Future Directions
256 |
257 | This tool is designed to make it easily extended to other version-control
258 | systems: all VCS-specific components are in separate directories like
259 | src/git/ . The top-level `versioneer.py` script is assembled from these
260 | components by running make-versioneer.py . In the future, make-versioneer.py
261 | will take a VCS name as an argument, and will construct a version of
262 | `versioneer.py` that is specific to the given VCS. It might also take the
263 | configuration arguments that are currently provided manually during
264 | installation by editing setup.py . Alternatively, it might go the other
265 | direction and include code from all supported VCS systems, reducing the
266 | number of intermediate scripts.
267 |
268 |
269 | ## License
270 |
271 | To make Versioneer easier to embed, all its code is dedicated to the public
272 | domain. The `_version.py` that it creates is also in the public domain.
273 | Specifically, both are released under the Creative Commons "Public Domain
274 | Dedication" license (CC0-1.0), as described in
275 | https://creativecommons.org/publicdomain/zero/1.0/ .
276 |
277 | """
278 |
279 | from __future__ import print_function
280 | try:
281 | import configparser
282 | except ImportError:
283 | import ConfigParser as configparser
284 | import errno
285 | import json
286 | import os
287 | import re
288 | import subprocess
289 | import sys
290 |
291 |
292 | class VersioneerConfig:
293 | """Container for Versioneer configuration parameters."""
294 |
295 |
296 | def get_root():
297 | """Get the project root directory.
298 |
299 | We require that all commands are run from the project root, i.e. the
300 | directory that contains setup.py, setup.cfg, and versioneer.py .
301 | """
302 | root = os.path.realpath(os.path.abspath(os.getcwd()))
303 | setup_py = os.path.join(root, "setup.py")
304 | versioneer_py = os.path.join(root, "versioneer.py")
305 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
306 | # allow 'python path/to/setup.py COMMAND'
307 | root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
308 | setup_py = os.path.join(root, "setup.py")
309 | versioneer_py = os.path.join(root, "versioneer.py")
310 | if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
311 | err = ("Versioneer was unable to run the project root directory. "
312 | "Versioneer requires setup.py to be executed from "
313 | "its immediate directory (like 'python setup.py COMMAND'), "
314 | "or in a way that lets it use sys.argv[0] to find the root "
315 | "(like 'python path/to/setup.py COMMAND').")
316 | raise VersioneerBadRootError(err)
317 | try:
318 | # Certain runtime workflows (setup.py install/develop in a setuptools
319 | # tree) execute all dependencies in a single python process, so
320 | # "versioneer" may be imported multiple times, and python's shared
321 | # module-import table will cache the first one. So we can't use
322 | # os.path.dirname(__file__), as that will find whichever
323 | # versioneer.py was first imported, even in later projects.
324 | me = os.path.realpath(os.path.abspath(__file__))
325 | me_dir = os.path.normcase(os.path.splitext(me)[0])
326 | vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
327 | if me_dir != vsr_dir:
328 | print("Warning: build in %s is using versioneer.py from %s"
329 | % (os.path.dirname(me), versioneer_py))
330 | except NameError:
331 | pass
332 | return root
333 |
334 |
335 | def get_config_from_root(root):
336 | """Read the project setup.cfg file to determine Versioneer config."""
337 | # This might raise EnvironmentError (if setup.cfg is missing), or
338 | # configparser.NoSectionError (if it lacks a [versioneer] section), or
339 | # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
340 | # the top of versioneer.py for instructions on writing your setup.cfg .
341 | setup_cfg = os.path.join(root, "setup.cfg")
342 | parser = configparser.SafeConfigParser()
343 | with open(setup_cfg, "r") as f:
344 | parser.readfp(f)
345 | VCS = parser.get("versioneer", "VCS") # mandatory
346 |
347 | def get(parser, name):
348 | if parser.has_option("versioneer", name):
349 | return parser.get("versioneer", name)
350 | return None
351 | cfg = VersioneerConfig()
352 | cfg.VCS = VCS
353 | cfg.style = get(parser, "style") or ""
354 | cfg.versionfile_source = get(parser, "versionfile_source")
355 | cfg.versionfile_build = get(parser, "versionfile_build")
356 | cfg.tag_prefix = get(parser, "tag_prefix")
357 | if cfg.tag_prefix in ("''", '""'):
358 | cfg.tag_prefix = ""
359 | cfg.parentdir_prefix = get(parser, "parentdir_prefix")
360 | cfg.verbose = get(parser, "verbose")
361 | return cfg
362 |
363 |
364 | class NotThisMethod(Exception):
365 | """Exception raised if a method is not valid for the current scenario."""
366 |
367 |
368 | # these dictionaries contain VCS-specific tools
369 | LONG_VERSION_PY = {}
370 | HANDLERS = {}
371 |
372 |
373 | def register_vcs_handler(vcs, method): # decorator
374 | """Decorator to mark a method as the handler for a particular VCS."""
375 | def decorate(f):
376 | """Store f in HANDLERS[vcs][method]."""
377 | if vcs not in HANDLERS:
378 | HANDLERS[vcs] = {}
379 | HANDLERS[vcs][method] = f
380 | return f
381 | return decorate
382 |
383 |
384 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
385 | env=None):
386 | """Call the given command(s)."""
387 | assert isinstance(commands, list)
388 | p = None
389 | for c in commands:
390 | try:
391 | dispcmd = str([c] + args)
392 | # remember shell=False, so use git.cmd on windows, not just git
393 | p = subprocess.Popen([c] + args, cwd=cwd, env=env,
394 | stdout=subprocess.PIPE,
395 | stderr=(subprocess.PIPE if hide_stderr
396 | else None))
397 | break
398 | except EnvironmentError:
399 | e = sys.exc_info()[1]
400 | if e.errno == errno.ENOENT:
401 | continue
402 | if verbose:
403 | print("unable to run %s" % dispcmd)
404 | print(e)
405 | return None, None
406 | else:
407 | if verbose:
408 | print("unable to find command, tried %s" % (commands,))
409 | return None, None
410 | stdout = p.communicate()[0].strip()
411 | if sys.version_info[0] >= 3:
412 | stdout = stdout.decode()
413 | if p.returncode != 0:
414 | if verbose:
415 | print("unable to run %s (error)" % dispcmd)
416 | print("stdout was %s" % stdout)
417 | return None, p.returncode
418 | return stdout, p.returncode
419 |
420 |
421 | LONG_VERSION_PY['git'] = '''
422 | # This file helps to compute a version number in source trees obtained from
423 | # git-archive tarball (such as those provided by githubs download-from-tag
424 | # feature). Distribution tarballs (built by setup.py sdist) and build
425 | # directories (produced by setup.py build) will contain a much shorter file
426 | # that just contains the computed version number.
427 |
428 | # This file is released into the public domain. Generated by
429 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
430 |
431 | """Git implementation of _version.py."""
432 |
433 | import errno
434 | import os
435 | import re
436 | import subprocess
437 | import sys
438 |
439 |
440 | def get_keywords():
441 | """Get the keywords needed to look up the version information."""
442 | # these strings will be replaced by git during git-archive.
443 | # setup.py/versioneer.py will grep for the variable names, so they must
444 | # each be defined on a line of their own. _version.py will just call
445 | # get_keywords().
446 | git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
447 | git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
448 | git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
449 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
450 | return keywords
451 |
452 |
453 | class VersioneerConfig:
454 | """Container for Versioneer configuration parameters."""
455 |
456 |
457 | def get_config():
458 | """Create, populate and return the VersioneerConfig() object."""
459 | # these strings are filled in when 'setup.py versioneer' creates
460 | # _version.py
461 | cfg = VersioneerConfig()
462 | cfg.VCS = "git"
463 | cfg.style = "%(STYLE)s"
464 | cfg.tag_prefix = "%(TAG_PREFIX)s"
465 | cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
466 | cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
467 | cfg.verbose = False
468 | return cfg
469 |
470 |
471 | class NotThisMethod(Exception):
472 | """Exception raised if a method is not valid for the current scenario."""
473 |
474 |
475 | LONG_VERSION_PY = {}
476 | HANDLERS = {}
477 |
478 |
479 | def register_vcs_handler(vcs, method): # decorator
480 | """Decorator to mark a method as the handler for a particular VCS."""
481 | def decorate(f):
482 | """Store f in HANDLERS[vcs][method]."""
483 | if vcs not in HANDLERS:
484 | HANDLERS[vcs] = {}
485 | HANDLERS[vcs][method] = f
486 | return f
487 | return decorate
488 |
489 |
490 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
491 | env=None):
492 | """Call the given command(s)."""
493 | assert isinstance(commands, list)
494 | p = None
495 | for c in commands:
496 | try:
497 | dispcmd = str([c] + args)
498 | # remember shell=False, so use git.cmd on windows, not just git
499 | p = subprocess.Popen([c] + args, cwd=cwd, env=env,
500 | stdout=subprocess.PIPE,
501 | stderr=(subprocess.PIPE if hide_stderr
502 | else None))
503 | break
504 | except EnvironmentError:
505 | e = sys.exc_info()[1]
506 | if e.errno == errno.ENOENT:
507 | continue
508 | if verbose:
509 | print("unable to run %%s" %% dispcmd)
510 | print(e)
511 | return None, None
512 | else:
513 | if verbose:
514 | print("unable to find command, tried %%s" %% (commands,))
515 | return None, None
516 | stdout = p.communicate()[0].strip()
517 | if sys.version_info[0] >= 3:
518 | stdout = stdout.decode()
519 | if p.returncode != 0:
520 | if verbose:
521 | print("unable to run %%s (error)" %% dispcmd)
522 | print("stdout was %%s" %% stdout)
523 | return None, p.returncode
524 | return stdout, p.returncode
525 |
526 |
527 | def versions_from_parentdir(parentdir_prefix, root, verbose):
528 | """Try to determine the version from the parent directory name.
529 |
530 | Source tarballs conventionally unpack into a directory that includes both
531 | the project name and a version string. We will also support searching up
532 | two directory levels for an appropriately named parent directory
533 | """
534 | rootdirs = []
535 |
536 | for i in range(3):
537 | dirname = os.path.basename(root)
538 | if dirname.startswith(parentdir_prefix):
539 | return {"version": dirname[len(parentdir_prefix):],
540 | "full-revisionid": None,
541 | "dirty": False, "error": None, "date": None}
542 | else:
543 | rootdirs.append(root)
544 | root = os.path.dirname(root) # up a level
545 |
546 | if verbose:
547 | print("Tried directories %%s but none started with prefix %%s" %%
548 | (str(rootdirs), parentdir_prefix))
549 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
550 |
551 |
552 | @register_vcs_handler("git", "get_keywords")
553 | def git_get_keywords(versionfile_abs):
554 | """Extract version information from the given file."""
555 | # the code embedded in _version.py can just fetch the value of these
556 | # keywords. When used from setup.py, we don't want to import _version.py,
557 | # so we do it with a regexp instead. This function is not used from
558 | # _version.py.
559 | keywords = {}
560 | try:
561 | f = open(versionfile_abs, "r")
562 | for line in f.readlines():
563 | if line.strip().startswith("git_refnames ="):
564 | mo = re.search(r'=\s*"(.*)"', line)
565 | if mo:
566 | keywords["refnames"] = mo.group(1)
567 | if line.strip().startswith("git_full ="):
568 | mo = re.search(r'=\s*"(.*)"', line)
569 | if mo:
570 | keywords["full"] = mo.group(1)
571 | if line.strip().startswith("git_date ="):
572 | mo = re.search(r'=\s*"(.*)"', line)
573 | if mo:
574 | keywords["date"] = mo.group(1)
575 | f.close()
576 | except EnvironmentError:
577 | pass
578 | return keywords
579 |
580 |
581 | @register_vcs_handler("git", "keywords")
582 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
583 | """Get version information from git keywords."""
584 | if not keywords:
585 | raise NotThisMethod("no keywords at all, weird")
586 | date = keywords.get("date")
587 | if date is not None:
588 | # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
589 | # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
590 | # -like" string, which we must then edit to make compliant), because
591 | # it's been around since git-1.5.3, and it's too difficult to
592 | # discover which version we're using, or to work around using an
593 | # older one.
594 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
595 | refnames = keywords["refnames"].strip()
596 | if refnames.startswith("$Format"):
597 | if verbose:
598 | print("keywords are unexpanded, not using")
599 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
600 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
601 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
602 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
603 | TAG = "tag: "
604 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
605 | if not tags:
606 | # Either we're using git < 1.8.3, or there really are no tags. We use
607 | # a heuristic: assume all version tags have a digit. The old git %%d
608 | # expansion behaves like git log --decorate=short and strips out the
609 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
610 | # between branches and tags. By ignoring refnames without digits, we
611 | # filter out many common branch names like "release" and
612 | # "stabilization", as well as "HEAD" and "master".
613 | tags = set([r for r in refs if re.search(r'\d', r)])
614 | if verbose:
615 | print("discarding '%%s', no digits" %% ",".join(refs - tags))
616 | if verbose:
617 | print("likely tags: %%s" %% ",".join(sorted(tags)))
618 | for ref in sorted(tags):
619 | # sorting will prefer e.g. "2.0" over "2.0rc1"
620 | if ref.startswith(tag_prefix):
621 | r = ref[len(tag_prefix):]
622 | if verbose:
623 | print("picking %%s" %% r)
624 | return {"version": r,
625 | "full-revisionid": keywords["full"].strip(),
626 | "dirty": False, "error": None,
627 | "date": date}
628 | # no suitable tags, so version is "0+unknown", but full hex is still there
629 | if verbose:
630 | print("no suitable tags, using unknown + full revision id")
631 | return {"version": "0+unknown",
632 | "full-revisionid": keywords["full"].strip(),
633 | "dirty": False, "error": "no suitable tags", "date": None}
634 |
635 |
636 | @register_vcs_handler("git", "pieces_from_vcs")
637 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
638 | """Get version from 'git describe' in the root of the source tree.
639 |
640 | This only gets called if the git-archive 'subst' keywords were *not*
641 | expanded, and _version.py hasn't already been rewritten with a short
642 | version string, meaning we're inside a checked out source tree.
643 | """
644 | GITS = ["git"]
645 | if sys.platform == "win32":
646 | GITS = ["git.cmd", "git.exe"]
647 |
648 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
649 | hide_stderr=True)
650 | if rc != 0:
651 | if verbose:
652 | print("Directory %%s not under git control" %% root)
653 | raise NotThisMethod("'git rev-parse --git-dir' returned error")
654 |
655 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
656 | # if there isn't one, this yields HEX[-dirty] (no NUM)
657 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
658 | "--always", "--long",
659 | "--match", "%%s*" %% tag_prefix],
660 | cwd=root)
661 | # --long was added in git-1.5.5
662 | if describe_out is None:
663 | raise NotThisMethod("'git describe' failed")
664 | describe_out = describe_out.strip()
665 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
666 | if full_out is None:
667 | raise NotThisMethod("'git rev-parse' failed")
668 | full_out = full_out.strip()
669 |
670 | pieces = {}
671 | pieces["long"] = full_out
672 | pieces["short"] = full_out[:7] # maybe improved later
673 | pieces["error"] = None
674 |
675 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
676 | # TAG might have hyphens.
677 | git_describe = describe_out
678 |
679 | # look for -dirty suffix
680 | dirty = git_describe.endswith("-dirty")
681 | pieces["dirty"] = dirty
682 | if dirty:
683 | git_describe = git_describe[:git_describe.rindex("-dirty")]
684 |
685 | # now we have TAG-NUM-gHEX or HEX
686 |
687 | if "-" in git_describe:
688 | # TAG-NUM-gHEX
689 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
690 | if not mo:
691 | # unparseable. Maybe git-describe is misbehaving?
692 | pieces["error"] = ("unable to parse git-describe output: '%%s'"
693 | %% describe_out)
694 | return pieces
695 |
696 | # tag
697 | full_tag = mo.group(1)
698 | if not full_tag.startswith(tag_prefix):
699 | if verbose:
700 | fmt = "tag '%%s' doesn't start with prefix '%%s'"
701 | print(fmt %% (full_tag, tag_prefix))
702 | pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
703 | %% (full_tag, tag_prefix))
704 | return pieces
705 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
706 |
707 | # distance: number of commits since tag
708 | pieces["distance"] = int(mo.group(2))
709 |
710 | # commit: short hex revision ID
711 | pieces["short"] = mo.group(3)
712 |
713 | else:
714 | # HEX: no tags
715 | pieces["closest-tag"] = None
716 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
717 | cwd=root)
718 | pieces["distance"] = int(count_out) # total number of commits
719 |
720 | # commit date: see ISO-8601 comment in git_versions_from_keywords()
721 | date = run_command(GITS, ["show", "-s", "--format=%%ci", "HEAD"],
722 | cwd=root)[0].strip()
723 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
724 |
725 | return pieces
726 |
727 |
728 | def plus_or_dot(pieces):
729 | """Return a + if we don't already have one, else return a ."""
730 | if "+" in pieces.get("closest-tag", ""):
731 | return "."
732 | return "+"
733 |
734 |
735 | def render_pep440(pieces):
736 | """Build up version string, with post-release "local version identifier".
737 |
738 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
739 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
740 |
741 | Exceptions:
742 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
743 | """
744 | if pieces["closest-tag"]:
745 | rendered = pieces["closest-tag"]
746 | if pieces["distance"] or pieces["dirty"]:
747 | rendered += plus_or_dot(pieces)
748 | rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
749 | if pieces["dirty"]:
750 | rendered += ".dirty"
751 | else:
752 | # exception #1
753 | rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
754 | pieces["short"])
755 | if pieces["dirty"]:
756 | rendered += ".dirty"
757 | return rendered
758 |
759 |
760 | def render_pep440_pre(pieces):
761 | """TAG[.post.devDISTANCE] -- No -dirty.
762 |
763 | Exceptions:
764 | 1: no tags. 0.post.devDISTANCE
765 | """
766 | if pieces["closest-tag"]:
767 | rendered = pieces["closest-tag"]
768 | if pieces["distance"]:
769 | rendered += ".post.dev%%d" %% pieces["distance"]
770 | else:
771 | # exception #1
772 | rendered = "0.post.dev%%d" %% pieces["distance"]
773 | return rendered
774 |
775 |
776 | def render_pep440_post(pieces):
777 | """TAG[.postDISTANCE[.dev0]+gHEX] .
778 |
779 | The ".dev0" means dirty. Note that .dev0 sorts backwards
780 | (a dirty tree will appear "older" than the corresponding clean one),
781 | but you shouldn't be releasing software with -dirty anyways.
782 |
783 | Exceptions:
784 | 1: no tags. 0.postDISTANCE[.dev0]
785 | """
786 | if pieces["closest-tag"]:
787 | rendered = pieces["closest-tag"]
788 | if pieces["distance"] or pieces["dirty"]:
789 | rendered += ".post%%d" %% pieces["distance"]
790 | if pieces["dirty"]:
791 | rendered += ".dev0"
792 | rendered += plus_or_dot(pieces)
793 | rendered += "g%%s" %% pieces["short"]
794 | else:
795 | # exception #1
796 | rendered = "0.post%%d" %% pieces["distance"]
797 | if pieces["dirty"]:
798 | rendered += ".dev0"
799 | rendered += "+g%%s" %% pieces["short"]
800 | return rendered
801 |
802 |
803 | def render_pep440_old(pieces):
804 | """TAG[.postDISTANCE[.dev0]] .
805 |
806 | The ".dev0" means dirty.
807 |
808 | Eexceptions:
809 | 1: no tags. 0.postDISTANCE[.dev0]
810 | """
811 | if pieces["closest-tag"]:
812 | rendered = pieces["closest-tag"]
813 | if pieces["distance"] or pieces["dirty"]:
814 | rendered += ".post%%d" %% pieces["distance"]
815 | if pieces["dirty"]:
816 | rendered += ".dev0"
817 | else:
818 | # exception #1
819 | rendered = "0.post%%d" %% pieces["distance"]
820 | if pieces["dirty"]:
821 | rendered += ".dev0"
822 | return rendered
823 |
824 |
825 | def render_git_describe(pieces):
826 | """TAG[-DISTANCE-gHEX][-dirty].
827 |
828 | Like 'git describe --tags --dirty --always'.
829 |
830 | Exceptions:
831 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
832 | """
833 | if pieces["closest-tag"]:
834 | rendered = pieces["closest-tag"]
835 | if pieces["distance"]:
836 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
837 | else:
838 | # exception #1
839 | rendered = pieces["short"]
840 | if pieces["dirty"]:
841 | rendered += "-dirty"
842 | return rendered
843 |
844 |
845 | def render_git_describe_long(pieces):
846 | """TAG-DISTANCE-gHEX[-dirty].
847 |
848 | Like 'git describe --tags --dirty --always -long'.
849 | The distance/hash is unconditional.
850 |
851 | Exceptions:
852 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
853 | """
854 | if pieces["closest-tag"]:
855 | rendered = pieces["closest-tag"]
856 | rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
857 | else:
858 | # exception #1
859 | rendered = pieces["short"]
860 | if pieces["dirty"]:
861 | rendered += "-dirty"
862 | return rendered
863 |
864 |
865 | def render(pieces, style):
866 | """Render the given version pieces into the requested style."""
867 | if pieces["error"]:
868 | return {"version": "unknown",
869 | "full-revisionid": pieces.get("long"),
870 | "dirty": None,
871 | "error": pieces["error"],
872 | "date": None}
873 |
874 | if not style or style == "default":
875 | style = "pep440" # the default
876 |
877 | if style == "pep440":
878 | rendered = render_pep440(pieces)
879 | elif style == "pep440-pre":
880 | rendered = render_pep440_pre(pieces)
881 | elif style == "pep440-post":
882 | rendered = render_pep440_post(pieces)
883 | elif style == "pep440-old":
884 | rendered = render_pep440_old(pieces)
885 | elif style == "git-describe":
886 | rendered = render_git_describe(pieces)
887 | elif style == "git-describe-long":
888 | rendered = render_git_describe_long(pieces)
889 | else:
890 | raise ValueError("unknown style '%%s'" %% style)
891 |
892 | return {"version": rendered, "full-revisionid": pieces["long"],
893 | "dirty": pieces["dirty"], "error": None,
894 | "date": pieces.get("date")}
895 |
896 |
897 | def get_versions():
898 | """Get version information or return default if unable to do so."""
899 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
900 | # __file__, we can work backwards from there to the root. Some
901 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
902 | # case we can only use expanded keywords.
903 |
904 | cfg = get_config()
905 | verbose = cfg.verbose
906 |
907 | try:
908 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
909 | verbose)
910 | except NotThisMethod:
911 | pass
912 |
913 | try:
914 | root = os.path.realpath(__file__)
915 | # versionfile_source is the relative path from the top of the source
916 | # tree (where the .git directory might live) to this file. Invert
917 | # this to find the root from __file__.
918 | for i in cfg.versionfile_source.split('/'):
919 | root = os.path.dirname(root)
920 | except NameError:
921 | return {"version": "0+unknown", "full-revisionid": None,
922 | "dirty": None,
923 | "error": "unable to find root of source tree",
924 | "date": None}
925 |
926 | try:
927 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
928 | return render(pieces, cfg.style)
929 | except NotThisMethod:
930 | pass
931 |
932 | try:
933 | if cfg.parentdir_prefix:
934 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
935 | except NotThisMethod:
936 | pass
937 |
938 | return {"version": "0+unknown", "full-revisionid": None,
939 | "dirty": None,
940 | "error": "unable to compute version", "date": None}
941 | '''
942 |
943 |
944 | @register_vcs_handler("git", "get_keywords")
945 | def git_get_keywords(versionfile_abs):
946 | """Extract version information from the given file."""
947 | # the code embedded in _version.py can just fetch the value of these
948 | # keywords. When used from setup.py, we don't want to import _version.py,
949 | # so we do it with a regexp instead. This function is not used from
950 | # _version.py.
951 | keywords = {}
952 | try:
953 | f = open(versionfile_abs, "r")
954 | for line in f.readlines():
955 | if line.strip().startswith("git_refnames ="):
956 | mo = re.search(r'=\s*"(.*)"', line)
957 | if mo:
958 | keywords["refnames"] = mo.group(1)
959 | if line.strip().startswith("git_full ="):
960 | mo = re.search(r'=\s*"(.*)"', line)
961 | if mo:
962 | keywords["full"] = mo.group(1)
963 | if line.strip().startswith("git_date ="):
964 | mo = re.search(r'=\s*"(.*)"', line)
965 | if mo:
966 | keywords["date"] = mo.group(1)
967 | f.close()
968 | except EnvironmentError:
969 | pass
970 | return keywords
971 |
972 |
973 | @register_vcs_handler("git", "keywords")
974 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
975 | """Get version information from git keywords."""
976 | if not keywords:
977 | raise NotThisMethod("no keywords at all, weird")
978 | date = keywords.get("date")
979 | if date is not None:
980 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
981 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
982 | # -like" string, which we must then edit to make compliant), because
983 | # it's been around since git-1.5.3, and it's too difficult to
984 | # discover which version we're using, or to work around using an
985 | # older one.
986 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
987 | refnames = keywords["refnames"].strip()
988 | if refnames.startswith("$Format"):
989 | if verbose:
990 | print("keywords are unexpanded, not using")
991 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
992 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
993 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
994 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
995 | TAG = "tag: "
996 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
997 | if not tags:
998 | # Either we're using git < 1.8.3, or there really are no tags. We use
999 | # a heuristic: assume all version tags have a digit. The old git %d
1000 | # expansion behaves like git log --decorate=short and strips out the
1001 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
1002 | # between branches and tags. By ignoring refnames without digits, we
1003 | # filter out many common branch names like "release" and
1004 | # "stabilization", as well as "HEAD" and "master".
1005 | tags = set([r for r in refs if re.search(r'\d', r)])
1006 | if verbose:
1007 | print("discarding '%s', no digits" % ",".join(refs - tags))
1008 | if verbose:
1009 | print("likely tags: %s" % ",".join(sorted(tags)))
1010 | for ref in sorted(tags):
1011 | # sorting will prefer e.g. "2.0" over "2.0rc1"
1012 | if ref.startswith(tag_prefix):
1013 | r = ref[len(tag_prefix):]
1014 | if verbose:
1015 | print("picking %s" % r)
1016 | return {"version": r,
1017 | "full-revisionid": keywords["full"].strip(),
1018 | "dirty": False, "error": None,
1019 | "date": date}
1020 | # no suitable tags, so version is "0+unknown", but full hex is still there
1021 | if verbose:
1022 | print("no suitable tags, using unknown + full revision id")
1023 | return {"version": "0+unknown",
1024 | "full-revisionid": keywords["full"].strip(),
1025 | "dirty": False, "error": "no suitable tags", "date": None}
1026 |
1027 |
1028 | @register_vcs_handler("git", "pieces_from_vcs")
1029 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
1030 | """Get version from 'git describe' in the root of the source tree.
1031 |
1032 | This only gets called if the git-archive 'subst' keywords were *not*
1033 | expanded, and _version.py hasn't already been rewritten with a short
1034 | version string, meaning we're inside a checked out source tree.
1035 | """
1036 | GITS = ["git"]
1037 | if sys.platform == "win32":
1038 | GITS = ["git.cmd", "git.exe"]
1039 |
1040 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
1041 | hide_stderr=True)
1042 | if rc != 0:
1043 | if verbose:
1044 | print("Directory %s not under git control" % root)
1045 | raise NotThisMethod("'git rev-parse --git-dir' returned error")
1046 |
1047 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
1048 | # if there isn't one, this yields HEX[-dirty] (no NUM)
1049 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
1050 | "--always", "--long",
1051 | "--match", "%s*" % tag_prefix],
1052 | cwd=root)
1053 | # --long was added in git-1.5.5
1054 | if describe_out is None:
1055 | raise NotThisMethod("'git describe' failed")
1056 | describe_out = describe_out.strip()
1057 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
1058 | if full_out is None:
1059 | raise NotThisMethod("'git rev-parse' failed")
1060 | full_out = full_out.strip()
1061 |
1062 | pieces = {}
1063 | pieces["long"] = full_out
1064 | pieces["short"] = full_out[:7] # maybe improved later
1065 | pieces["error"] = None
1066 |
1067 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
1068 | # TAG might have hyphens.
1069 | git_describe = describe_out
1070 |
1071 | # look for -dirty suffix
1072 | dirty = git_describe.endswith("-dirty")
1073 | pieces["dirty"] = dirty
1074 | if dirty:
1075 | git_describe = git_describe[:git_describe.rindex("-dirty")]
1076 |
1077 | # now we have TAG-NUM-gHEX or HEX
1078 |
1079 | if "-" in git_describe:
1080 | # TAG-NUM-gHEX
1081 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
1082 | if not mo:
1083 | # unparseable. Maybe git-describe is misbehaving?
1084 | pieces["error"] = ("unable to parse git-describe output: '%s'"
1085 | % describe_out)
1086 | return pieces
1087 |
1088 | # tag
1089 | full_tag = mo.group(1)
1090 | if not full_tag.startswith(tag_prefix):
1091 | if verbose:
1092 | fmt = "tag '%s' doesn't start with prefix '%s'"
1093 | print(fmt % (full_tag, tag_prefix))
1094 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
1095 | % (full_tag, tag_prefix))
1096 | return pieces
1097 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
1098 |
1099 | # distance: number of commits since tag
1100 | pieces["distance"] = int(mo.group(2))
1101 |
1102 | # commit: short hex revision ID
1103 | pieces["short"] = mo.group(3)
1104 |
1105 | else:
1106 | # HEX: no tags
1107 | pieces["closest-tag"] = None
1108 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
1109 | cwd=root)
1110 | pieces["distance"] = int(count_out) # total number of commits
1111 |
1112 | # commit date: see ISO-8601 comment in git_versions_from_keywords()
1113 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
1114 | cwd=root)[0].strip()
1115 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
1116 |
1117 | return pieces
1118 |
1119 |
1120 | def do_vcs_install(manifest_in, versionfile_source, ipy):
1121 | """Git-specific installation logic for Versioneer.
1122 |
1123 | For Git, this means creating/changing .gitattributes to mark _version.py
1124 | for export-subst keyword substitution.
1125 | """
1126 | GITS = ["git"]
1127 | if sys.platform == "win32":
1128 | GITS = ["git.cmd", "git.exe"]
1129 | files = [manifest_in, versionfile_source]
1130 | if ipy:
1131 | files.append(ipy)
1132 | try:
1133 | me = __file__
1134 | if me.endswith(".pyc") or me.endswith(".pyo"):
1135 | me = os.path.splitext(me)[0] + ".py"
1136 | versioneer_file = os.path.relpath(me)
1137 | except NameError:
1138 | versioneer_file = "versioneer.py"
1139 | files.append(versioneer_file)
1140 | present = False
1141 | try:
1142 | f = open(".gitattributes", "r")
1143 | for line in f.readlines():
1144 | if line.strip().startswith(versionfile_source):
1145 | if "export-subst" in line.strip().split()[1:]:
1146 | present = True
1147 | f.close()
1148 | except EnvironmentError:
1149 | pass
1150 | if not present:
1151 | f = open(".gitattributes", "a+")
1152 | f.write("%s export-subst\n" % versionfile_source)
1153 | f.close()
1154 | files.append(".gitattributes")
1155 | run_command(GITS, ["add", "--"] + files)
1156 |
1157 |
1158 | def versions_from_parentdir(parentdir_prefix, root, verbose):
1159 | """Try to determine the version from the parent directory name.
1160 |
1161 | Source tarballs conventionally unpack into a directory that includes both
1162 | the project name and a version string. We will also support searching up
1163 | two directory levels for an appropriately named parent directory
1164 | """
1165 | rootdirs = []
1166 |
1167 | for i in range(3):
1168 | dirname = os.path.basename(root)
1169 | if dirname.startswith(parentdir_prefix):
1170 | return {"version": dirname[len(parentdir_prefix):],
1171 | "full-revisionid": None,
1172 | "dirty": False, "error": None, "date": None}
1173 | else:
1174 | rootdirs.append(root)
1175 | root = os.path.dirname(root) # up a level
1176 |
1177 | if verbose:
1178 | print("Tried directories %s but none started with prefix %s" %
1179 | (str(rootdirs), parentdir_prefix))
1180 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
1181 |
1182 |
1183 | SHORT_VERSION_PY = """
1184 | # This file was generated by 'versioneer.py' (0.18) from
1185 | # revision-control system data, or from the parent directory name of an
1186 | # unpacked source archive. Distribution tarballs contain a pre-generated copy
1187 | # of this file.
1188 |
1189 | import json
1190 |
1191 | version_json = '''
1192 | %s
1193 | ''' # END VERSION_JSON
1194 |
1195 |
1196 | def get_versions():
1197 | return json.loads(version_json)
1198 | """
1199 |
1200 |
1201 | def versions_from_file(filename):
1202 | """Try to determine the version from _version.py if present."""
1203 | try:
1204 | with open(filename) as f:
1205 | contents = f.read()
1206 | except EnvironmentError:
1207 | raise NotThisMethod("unable to read _version.py")
1208 | mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON",
1209 | contents, re.M | re.S)
1210 | if not mo:
1211 | mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON",
1212 | contents, re.M | re.S)
1213 | if not mo:
1214 | raise NotThisMethod("no version_json in _version.py")
1215 | return json.loads(mo.group(1))
1216 |
1217 |
1218 | def write_to_version_file(filename, versions):
1219 | """Write the given version number to the given _version.py file."""
1220 | os.unlink(filename)
1221 | contents = json.dumps(versions, sort_keys=True,
1222 | indent=1, separators=(",", ": "))
1223 | with open(filename, "w") as f:
1224 | f.write(SHORT_VERSION_PY % contents)
1225 |
1226 | print("set %s to '%s'" % (filename, versions["version"]))
1227 |
1228 |
1229 | def plus_or_dot(pieces):
1230 | """Return a + if we don't already have one, else return a ."""
1231 | if "+" in pieces.get("closest-tag", ""):
1232 | return "."
1233 | return "+"
1234 |
1235 |
1236 | def render_pep440(pieces):
1237 | """Build up version string, with post-release "local version identifier".
1238 |
1239 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
1240 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
1241 |
1242 | Exceptions:
1243 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
1244 | """
1245 | if pieces["closest-tag"]:
1246 | rendered = pieces["closest-tag"]
1247 | if pieces["distance"] or pieces["dirty"]:
1248 | rendered += plus_or_dot(pieces)
1249 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
1250 | if pieces["dirty"]:
1251 | rendered += ".dirty"
1252 | else:
1253 | # exception #1
1254 | rendered = "0+untagged.%d.g%s" % (pieces["distance"],
1255 | pieces["short"])
1256 | if pieces["dirty"]:
1257 | rendered += ".dirty"
1258 | return rendered
1259 |
1260 |
1261 | def render_pep440_pre(pieces):
1262 | """TAG[.post.devDISTANCE] -- No -dirty.
1263 |
1264 | Exceptions:
1265 | 1: no tags. 0.post.devDISTANCE
1266 | """
1267 | if pieces["closest-tag"]:
1268 | rendered = pieces["closest-tag"]
1269 | if pieces["distance"]:
1270 | rendered += ".post.dev%d" % pieces["distance"]
1271 | else:
1272 | # exception #1
1273 | rendered = "0.post.dev%d" % pieces["distance"]
1274 | return rendered
1275 |
1276 |
1277 | def render_pep440_post(pieces):
1278 | """TAG[.postDISTANCE[.dev0]+gHEX] .
1279 |
1280 | The ".dev0" means dirty. Note that .dev0 sorts backwards
1281 | (a dirty tree will appear "older" than the corresponding clean one),
1282 | but you shouldn't be releasing software with -dirty anyways.
1283 |
1284 | Exceptions:
1285 | 1: no tags. 0.postDISTANCE[.dev0]
1286 | """
1287 | if pieces["closest-tag"]:
1288 | rendered = pieces["closest-tag"]
1289 | if pieces["distance"] or pieces["dirty"]:
1290 | rendered += ".post%d" % pieces["distance"]
1291 | if pieces["dirty"]:
1292 | rendered += ".dev0"
1293 | rendered += plus_or_dot(pieces)
1294 | rendered += "g%s" % pieces["short"]
1295 | else:
1296 | # exception #1
1297 | rendered = "0.post%d" % pieces["distance"]
1298 | if pieces["dirty"]:
1299 | rendered += ".dev0"
1300 | rendered += "+g%s" % pieces["short"]
1301 | return rendered
1302 |
1303 |
1304 | def render_pep440_old(pieces):
1305 | """TAG[.postDISTANCE[.dev0]] .
1306 |
1307 | The ".dev0" means dirty.
1308 |
1309 | Eexceptions:
1310 | 1: no tags. 0.postDISTANCE[.dev0]
1311 | """
1312 | if pieces["closest-tag"]:
1313 | rendered = pieces["closest-tag"]
1314 | if pieces["distance"] or pieces["dirty"]:
1315 | rendered += ".post%d" % pieces["distance"]
1316 | if pieces["dirty"]:
1317 | rendered += ".dev0"
1318 | else:
1319 | # exception #1
1320 | rendered = "0.post%d" % pieces["distance"]
1321 | if pieces["dirty"]:
1322 | rendered += ".dev0"
1323 | return rendered
1324 |
1325 |
1326 | def render_git_describe(pieces):
1327 | """TAG[-DISTANCE-gHEX][-dirty].
1328 |
1329 | Like 'git describe --tags --dirty --always'.
1330 |
1331 | Exceptions:
1332 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
1333 | """
1334 | if pieces["closest-tag"]:
1335 | rendered = pieces["closest-tag"]
1336 | if pieces["distance"]:
1337 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1338 | else:
1339 | # exception #1
1340 | rendered = pieces["short"]
1341 | if pieces["dirty"]:
1342 | rendered += "-dirty"
1343 | return rendered
1344 |
1345 |
1346 | def render_git_describe_long(pieces):
1347 | """TAG-DISTANCE-gHEX[-dirty].
1348 |
1349 | Like 'git describe --tags --dirty --always -long'.
1350 | The distance/hash is unconditional.
1351 |
1352 | Exceptions:
1353 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
1354 | """
1355 | if pieces["closest-tag"]:
1356 | rendered = pieces["closest-tag"]
1357 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
1358 | else:
1359 | # exception #1
1360 | rendered = pieces["short"]
1361 | if pieces["dirty"]:
1362 | rendered += "-dirty"
1363 | return rendered
1364 |
1365 |
1366 | def render(pieces, style):
1367 | """Render the given version pieces into the requested style."""
1368 | if pieces["error"]:
1369 | return {"version": "unknown",
1370 | "full-revisionid": pieces.get("long"),
1371 | "dirty": None,
1372 | "error": pieces["error"],
1373 | "date": None}
1374 |
1375 | if not style or style == "default":
1376 | style = "pep440" # the default
1377 |
1378 | if style == "pep440":
1379 | rendered = render_pep440(pieces)
1380 | elif style == "pep440-pre":
1381 | rendered = render_pep440_pre(pieces)
1382 | elif style == "pep440-post":
1383 | rendered = render_pep440_post(pieces)
1384 | elif style == "pep440-old":
1385 | rendered = render_pep440_old(pieces)
1386 | elif style == "git-describe":
1387 | rendered = render_git_describe(pieces)
1388 | elif style == "git-describe-long":
1389 | rendered = render_git_describe_long(pieces)
1390 | else:
1391 | raise ValueError("unknown style '%s'" % style)
1392 |
1393 | return {"version": rendered, "full-revisionid": pieces["long"],
1394 | "dirty": pieces["dirty"], "error": None,
1395 | "date": pieces.get("date")}
1396 |
1397 |
1398 | class VersioneerBadRootError(Exception):
1399 | """The project root directory is unknown or missing key files."""
1400 |
1401 |
1402 | def get_versions(verbose=False):
1403 | """Get the project version from whatever source is available.
1404 |
1405 | Returns dict with two keys: 'version' and 'full'.
1406 | """
1407 | if "versioneer" in sys.modules:
1408 | # see the discussion in cmdclass.py:get_cmdclass()
1409 | del sys.modules["versioneer"]
1410 |
1411 | root = get_root()
1412 | cfg = get_config_from_root(root)
1413 |
1414 | assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
1415 | handlers = HANDLERS.get(cfg.VCS)
1416 | assert handlers, "unrecognized VCS '%s'" % cfg.VCS
1417 | verbose = verbose or cfg.verbose
1418 | assert cfg.versionfile_source is not None, \
1419 | "please set versioneer.versionfile_source"
1420 | assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
1421 |
1422 | versionfile_abs = os.path.join(root, cfg.versionfile_source)
1423 |
1424 | # extract version from first of: _version.py, VCS command (e.g. 'git
1425 | # describe'), parentdir. This is meant to work for developers using a
1426 | # source checkout, for users of a tarball created by 'setup.py sdist',
1427 | # and for users of a tarball/zipball created by 'git archive' or github's
1428 | # download-from-tag feature or the equivalent in other VCSes.
1429 |
1430 | get_keywords_f = handlers.get("get_keywords")
1431 | from_keywords_f = handlers.get("keywords")
1432 | if get_keywords_f and from_keywords_f:
1433 | try:
1434 | keywords = get_keywords_f(versionfile_abs)
1435 | ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
1436 | if verbose:
1437 | print("got version from expanded keyword %s" % ver)
1438 | return ver
1439 | except NotThisMethod:
1440 | pass
1441 |
1442 | try:
1443 | ver = versions_from_file(versionfile_abs)
1444 | if verbose:
1445 | print("got version from file %s %s" % (versionfile_abs, ver))
1446 | return ver
1447 | except NotThisMethod:
1448 | pass
1449 |
1450 | from_vcs_f = handlers.get("pieces_from_vcs")
1451 | if from_vcs_f:
1452 | try:
1453 | pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
1454 | ver = render(pieces, cfg.style)
1455 | if verbose:
1456 | print("got version from VCS %s" % ver)
1457 | return ver
1458 | except NotThisMethod:
1459 | pass
1460 |
1461 | try:
1462 | if cfg.parentdir_prefix:
1463 | ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
1464 | if verbose:
1465 | print("got version from parentdir %s" % ver)
1466 | return ver
1467 | except NotThisMethod:
1468 | pass
1469 |
1470 | if verbose:
1471 | print("unable to compute version")
1472 |
1473 | return {"version": "0+unknown", "full-revisionid": None,
1474 | "dirty": None, "error": "unable to compute version",
1475 | "date": None}
1476 |
1477 |
1478 | def get_version():
1479 | """Get the short version string for this project."""
1480 | return get_versions()["version"]
1481 |
1482 |
1483 | def get_cmdclass():
1484 | """Get the custom setuptools/distutils subclasses used by Versioneer."""
1485 | if "versioneer" in sys.modules:
1486 | del sys.modules["versioneer"]
1487 | # this fixes the "python setup.py develop" case (also 'install' and
1488 | # 'easy_install .'), in which subdependencies of the main project are
1489 | # built (using setup.py bdist_egg) in the same python process. Assume
1490 | # a main project A and a dependency B, which use different versions
1491 | # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
1492 | # sys.modules by the time B's setup.py is executed, causing B to run
1493 | # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
1494 | # sandbox that restores sys.modules to it's pre-build state, so the
1495 | # parent is protected against the child's "import versioneer". By
1496 | # removing ourselves from sys.modules here, before the child build
1497 | # happens, we protect the child from the parent's versioneer too.
1498 | # Also see https://github.com/warner/python-versioneer/issues/52
1499 |
1500 | cmds = {}
1501 |
1502 | # we add "version" to both distutils and setuptools
1503 | from distutils.core import Command
1504 |
1505 | class cmd_version(Command):
1506 | description = "report generated version string"
1507 | user_options = []
1508 | boolean_options = []
1509 |
1510 | def initialize_options(self):
1511 | pass
1512 |
1513 | def finalize_options(self):
1514 | pass
1515 |
1516 | def run(self):
1517 | vers = get_versions(verbose=True)
1518 | print("Version: %s" % vers["version"])
1519 | print(" full-revisionid: %s" % vers.get("full-revisionid"))
1520 | print(" dirty: %s" % vers.get("dirty"))
1521 | print(" date: %s" % vers.get("date"))
1522 | if vers["error"]:
1523 | print(" error: %s" % vers["error"])
1524 | cmds["version"] = cmd_version
1525 |
1526 | # we override "build_py" in both distutils and setuptools
1527 | #
1528 | # most invocation pathways end up running build_py:
1529 | # distutils/build -> build_py
1530 | # distutils/install -> distutils/build ->..
1531 | # setuptools/bdist_wheel -> distutils/install ->..
1532 | # setuptools/bdist_egg -> distutils/install_lib -> build_py
1533 | # setuptools/install -> bdist_egg ->..
1534 | # setuptools/develop -> ?
1535 | # pip install:
1536 | # copies source tree to a tempdir before running egg_info/etc
1537 | # if .git isn't copied too, 'git describe' will fail
1538 | # then does setup.py bdist_wheel, or sometimes setup.py install
1539 | # setup.py egg_info -> ?
1540 |
1541 | # we override different "build_py" commands for both environments
1542 | if "setuptools" in sys.modules:
1543 | from setuptools.command.build_py import build_py as _build_py
1544 | else:
1545 | from distutils.command.build_py import build_py as _build_py
1546 |
1547 | class cmd_build_py(_build_py):
1548 | def run(self):
1549 | root = get_root()
1550 | cfg = get_config_from_root(root)
1551 | versions = get_versions()
1552 | _build_py.run(self)
1553 | # now locate _version.py in the new build/ directory and replace
1554 | # it with an updated value
1555 | if cfg.versionfile_build:
1556 | target_versionfile = os.path.join(self.build_lib,
1557 | cfg.versionfile_build)
1558 | print("UPDATING %s" % target_versionfile)
1559 | write_to_version_file(target_versionfile, versions)
1560 | cmds["build_py"] = cmd_build_py
1561 |
1562 | if "cx_Freeze" in sys.modules: # cx_freeze enabled?
1563 | from cx_Freeze.dist import build_exe as _build_exe
1564 | # nczeczulin reports that py2exe won't like the pep440-style string
1565 | # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
1566 | # setup(console=[{
1567 | # "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
1568 | # "product_version": versioneer.get_version(),
1569 | # ...
1570 |
1571 | class cmd_build_exe(_build_exe):
1572 | def run(self):
1573 | root = get_root()
1574 | cfg = get_config_from_root(root)
1575 | versions = get_versions()
1576 | target_versionfile = cfg.versionfile_source
1577 | print("UPDATING %s" % target_versionfile)
1578 | write_to_version_file(target_versionfile, versions)
1579 |
1580 | _build_exe.run(self)
1581 | os.unlink(target_versionfile)
1582 | with open(cfg.versionfile_source, "w") as f:
1583 | LONG = LONG_VERSION_PY[cfg.VCS]
1584 | f.write(LONG %
1585 | {"DOLLAR": "$",
1586 | "STYLE": cfg.style,
1587 | "TAG_PREFIX": cfg.tag_prefix,
1588 | "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1589 | "VERSIONFILE_SOURCE": cfg.versionfile_source,
1590 | })
1591 | cmds["build_exe"] = cmd_build_exe
1592 | del cmds["build_py"]
1593 |
1594 | if 'py2exe' in sys.modules: # py2exe enabled?
1595 | try:
1596 | from py2exe.distutils_buildexe import py2exe as _py2exe # py3
1597 | except ImportError:
1598 | from py2exe.build_exe import py2exe as _py2exe # py2
1599 |
1600 | class cmd_py2exe(_py2exe):
1601 | def run(self):
1602 | root = get_root()
1603 | cfg = get_config_from_root(root)
1604 | versions = get_versions()
1605 | target_versionfile = cfg.versionfile_source
1606 | print("UPDATING %s" % target_versionfile)
1607 | write_to_version_file(target_versionfile, versions)
1608 |
1609 | _py2exe.run(self)
1610 | os.unlink(target_versionfile)
1611 | with open(cfg.versionfile_source, "w") as f:
1612 | LONG = LONG_VERSION_PY[cfg.VCS]
1613 | f.write(LONG %
1614 | {"DOLLAR": "$",
1615 | "STYLE": cfg.style,
1616 | "TAG_PREFIX": cfg.tag_prefix,
1617 | "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1618 | "VERSIONFILE_SOURCE": cfg.versionfile_source,
1619 | })
1620 | cmds["py2exe"] = cmd_py2exe
1621 |
1622 | # we override different "sdist" commands for both environments
1623 | if "setuptools" in sys.modules:
1624 | from setuptools.command.sdist import sdist as _sdist
1625 | else:
1626 | from distutils.command.sdist import sdist as _sdist
1627 |
1628 | class cmd_sdist(_sdist):
1629 | def run(self):
1630 | versions = get_versions()
1631 | self._versioneer_generated_versions = versions
1632 | # unless we update this, the command will keep using the old
1633 | # version
1634 | self.distribution.metadata.version = versions["version"]
1635 | return _sdist.run(self)
1636 |
1637 | def make_release_tree(self, base_dir, files):
1638 | root = get_root()
1639 | cfg = get_config_from_root(root)
1640 | _sdist.make_release_tree(self, base_dir, files)
1641 | # now locate _version.py in the new base_dir directory
1642 | # (remembering that it may be a hardlink) and replace it with an
1643 | # updated value
1644 | target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
1645 | print("UPDATING %s" % target_versionfile)
1646 | write_to_version_file(target_versionfile,
1647 | self._versioneer_generated_versions)
1648 | cmds["sdist"] = cmd_sdist
1649 |
1650 | return cmds
1651 |
1652 |
1653 | CONFIG_ERROR = """
1654 | setup.cfg is missing the necessary Versioneer configuration. You need
1655 | a section like:
1656 |
1657 | [versioneer]
1658 | VCS = git
1659 | style = pep440
1660 | versionfile_source = src/myproject/_version.py
1661 | versionfile_build = myproject/_version.py
1662 | tag_prefix =
1663 | parentdir_prefix = myproject-
1664 |
1665 | You will also need to edit your setup.py to use the results:
1666 |
1667 | import versioneer
1668 | setup(version=versioneer.get_version(),
1669 | cmdclass=versioneer.get_cmdclass(), ...)
1670 |
1671 | Please read the docstring in ./versioneer.py for configuration instructions,
1672 | edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
1673 | """
1674 |
1675 | SAMPLE_CONFIG = """
1676 | # See the docstring in versioneer.py for instructions. Note that you must
1677 | # re-run 'versioneer.py setup' after changing this section, and commit the
1678 | # resulting files.
1679 |
1680 | [versioneer]
1681 | #VCS = git
1682 | #style = pep440
1683 | #versionfile_source =
1684 | #versionfile_build =
1685 | #tag_prefix =
1686 | #parentdir_prefix =
1687 |
1688 | """
1689 |
1690 | INIT_PY_SNIPPET = """
1691 | from ._version import get_versions
1692 | __version__ = get_versions()['version']
1693 | del get_versions
1694 | """
1695 |
1696 |
1697 | def do_setup():
1698 | """Main VCS-independent setup function for installing Versioneer."""
1699 | root = get_root()
1700 | try:
1701 | cfg = get_config_from_root(root)
1702 | except (EnvironmentError, configparser.NoSectionError,
1703 | configparser.NoOptionError) as e:
1704 | if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
1705 | print("Adding sample versioneer config to setup.cfg",
1706 | file=sys.stderr)
1707 | with open(os.path.join(root, "setup.cfg"), "a") as f:
1708 | f.write(SAMPLE_CONFIG)
1709 | print(CONFIG_ERROR, file=sys.stderr)
1710 | return 1
1711 |
1712 | print(" creating %s" % cfg.versionfile_source)
1713 | with open(cfg.versionfile_source, "w") as f:
1714 | LONG = LONG_VERSION_PY[cfg.VCS]
1715 | f.write(LONG % {"DOLLAR": "$",
1716 | "STYLE": cfg.style,
1717 | "TAG_PREFIX": cfg.tag_prefix,
1718 | "PARENTDIR_PREFIX": cfg.parentdir_prefix,
1719 | "VERSIONFILE_SOURCE": cfg.versionfile_source,
1720 | })
1721 |
1722 | ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
1723 | "__init__.py")
1724 | if os.path.exists(ipy):
1725 | try:
1726 | with open(ipy, "r") as f:
1727 | old = f.read()
1728 | except EnvironmentError:
1729 | old = ""
1730 | if INIT_PY_SNIPPET not in old:
1731 | print(" appending to %s" % ipy)
1732 | with open(ipy, "a") as f:
1733 | f.write(INIT_PY_SNIPPET)
1734 | else:
1735 | print(" %s unmodified" % ipy)
1736 | else:
1737 | print(" %s doesn't exist, ok" % ipy)
1738 | ipy = None
1739 |
1740 | # Make sure both the top-level "versioneer.py" and versionfile_source
1741 | # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
1742 | # they'll be copied into source distributions. Pip won't be able to
1743 | # install the package without this.
1744 | manifest_in = os.path.join(root, "MANIFEST.in")
1745 | simple_includes = set()
1746 | try:
1747 | with open(manifest_in, "r") as f:
1748 | for line in f:
1749 | if line.startswith("include "):
1750 | for include in line.split()[1:]:
1751 | simple_includes.add(include)
1752 | except EnvironmentError:
1753 | pass
1754 | # That doesn't cover everything MANIFEST.in can do
1755 | # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
1756 | # it might give some false negatives. Appending redundant 'include'
1757 | # lines is safe, though.
1758 | if "versioneer.py" not in simple_includes:
1759 | print(" appending 'versioneer.py' to MANIFEST.in")
1760 | with open(manifest_in, "a") as f:
1761 | f.write("include versioneer.py\n")
1762 | else:
1763 | print(" 'versioneer.py' already in MANIFEST.in")
1764 | if cfg.versionfile_source not in simple_includes:
1765 | print(" appending versionfile_source ('%s') to MANIFEST.in" %
1766 | cfg.versionfile_source)
1767 | with open(manifest_in, "a") as f:
1768 | f.write("include %s\n" % cfg.versionfile_source)
1769 | else:
1770 | print(" versionfile_source already in MANIFEST.in")
1771 |
1772 | # Make VCS-specific changes. For git, this means creating/changing
1773 | # .gitattributes to mark _version.py for export-subst keyword
1774 | # substitution.
1775 | do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
1776 | return 0
1777 |
1778 |
1779 | def scan_setup_py():
1780 | """Validate the contents of setup.py against Versioneer's expectations."""
1781 | found = set()
1782 | setters = False
1783 | errors = 0
1784 | with open("setup.py", "r") as f:
1785 | for line in f.readlines():
1786 | if "import versioneer" in line:
1787 | found.add("import")
1788 | if "versioneer.get_cmdclass()" in line:
1789 | found.add("cmdclass")
1790 | if "versioneer.get_version()" in line:
1791 | found.add("get_version")
1792 | if "versioneer.VCS" in line:
1793 | setters = True
1794 | if "versioneer.versionfile_source" in line:
1795 | setters = True
1796 | if len(found) != 3:
1797 | print("")
1798 | print("Your setup.py appears to be missing some important items")
1799 | print("(but I might be wrong). Please make sure it has something")
1800 | print("roughly like the following:")
1801 | print("")
1802 | print(" import versioneer")
1803 | print(" setup( version=versioneer.get_version(),")
1804 | print(" cmdclass=versioneer.get_cmdclass(), ...)")
1805 | print("")
1806 | errors += 1
1807 | if setters:
1808 | print("You should remove lines like 'versioneer.VCS = ' and")
1809 | print("'versioneer.versionfile_source = ' . This configuration")
1810 | print("now lives in setup.cfg, and should be removed from setup.py")
1811 | print("")
1812 | errors += 1
1813 | return errors
1814 |
1815 |
1816 | if __name__ == "__main__":
1817 | cmd = sys.argv[1]
1818 | if cmd == "setup":
1819 | errors = do_setup()
1820 | errors += scan_setup_py()
1821 | if errors:
1822 | sys.exit(1)
1823 |
--------------------------------------------------------------------------------