├── .coveragerc
├── .gitignore
├── .travis.yml
├── .whitesource
├── AUTHORS
├── INSTALL
├── LICENSE
├── MANIFEST.in
├── Pipfile
├── README.rst
├── codecov.yml
├── docker
    ├── MongoDB
    │   └── docker-compose.yml
    └── Redis
    │   └── docker-compose.yml
├── docs
    ├── Makefile
    ├── _ext
    │   └── scrapydocs.py
    ├── _static
    │   └── selectors-sample1.html
    ├── _templates
    │   └── layout.html
    ├── conf.py
    ├── conf.py.bak
    ├── index.rst
    ├── intro
    │   ├── examples.rst
    │   ├── installation.rst
    │   ├── overview.rst
    │   └── tutorial.rst
    ├── make.bat
    ├── requirements.txt
    └── topics
    │   ├── cookiesmiddleware.rst
    │   ├── settings.rst
    │   └── storage.rst
├── pytest.ini
├── renovate.json
├── requirements.txt
├── scrapy_cookies
    ├── VERSION
    ├── __init__.py
    ├── downloadermiddlewares
    │   ├── __init__.py
    │   └── cookies.py
    ├── settings
    │   ├── __init__.py
    │   └── default_settings.py
    ├── signals.py
    └── storage
    │   ├── __init__.py
    │   ├── in_memory.py
    │   ├── mongo.py
    │   ├── redis_.py
    │   └── sqlite.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── requirements.txt
    ├── test_downloadermiddleware_cookies.py
    └── test_storages
    │   ├── __init__.py
    │   ├── confest.py
    │   ├── docker-compose.yml
    │   ├── test_storage_in_memory.py
    │   ├── test_storage_mongo.py
    │   ├── test_storage_redis.py
    │   └── test_storage_sqlite.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = true
3 | include = scrapy_cookies/*
4 | omit =
5 |   tests/*
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # .gitignore contents are coming from the project gitignore:
  2 | # https://github.com/github/gitignore
  3 | 
  4 | 
  5 | # -----------------------------------------------------------------------------
  6 | # Python
  7 | # -----------------------------------------------------------------------------
  8 | 
  9 | # Byte-compiled / optimized / DLL files
 10 | __pycache__/
 11 | *.py[cod]
 12 | *$py.class
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | target/
 79 | 
 80 | # Jupyter Notebook
 81 | .ipynb_checkpoints
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # celery beat schedule file
 87 | celerybeat-schedule
 88 | 
 89 | # SageMath parsed files
 90 | *.sage.py
 91 | 
 92 | # Environments
 93 | .env
 94 | .venv
 95 | env/
 96 | venv/
 97 | ENV/
 98 | env.bak/
 99 | venv.bak/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mkdocs documentation
109 | /site
110 | 
111 | # mypy
112 | .mypy_cache/
113 | 
114 | 
115 | # -----------------------------------------------------------------------------
116 | # JetBrains
117 | # -----------------------------------------------------------------------------
118 | 
119 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion,
120 | # Android Studio and WebStorm
121 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
122 | 
123 | # User-specific stuff
124 | .idea/**/workspace.xml
125 | .idea/**/tasks.xml
126 | .idea/**/usage.statistics.xml
127 | .idea/**/dictionaries
128 | .idea/**/shelf
129 | 
130 | # Sensitive or high-churn files
131 | .idea/**/dataSources/
132 | .idea/**/dataSources.ids
133 | .idea/**/dataSources.local.xml
134 | .idea/**/sqlDataSources.xml
135 | .idea/**/dynamic.xml
136 | .idea/**/uiDesigner.xml
137 | .idea/**/dbnavigator.xml
138 | 
139 | # Gradle
140 | .idea/**/gradle.xml
141 | .idea/**/libraries
142 | 
143 | # Gradle and Maven with auto-import
144 | # When using Gradle or Maven with auto-import, you should exclude module files,
145 | # since they will be recreated, and may cause churn.  Uncomment if using
146 | # auto-import.
147 | # .idea/modules.xml
148 | # .idea/*.iml
149 | # .idea/modules
150 | 
151 | # CMake
152 | cmake-build-*/
153 | 
154 | # Mongo Explorer plugin
155 | .idea/**/mongoSettings.xml
156 | 
157 | # File-based project format
158 | *.iws
159 | 
160 | # IntelliJ
161 | out/
162 | 
163 | # mpeltonen/sbt-idea plugin
164 | .idea_modules/
165 | 
166 | # JIRA plugin
167 | atlassian-ide-plugin.xml
168 | 
169 | # Cursive Clojure plugin
170 | .idea/replstate.xml
171 | 
172 | # Crashlytics plugin (for Android Studio and IntelliJ)
173 | com_crashlytics_export_strings.xml
174 | crashlytics.properties
175 | crashlytics-build.properties
176 | fabric.properties
177 | 
178 | # Editor-based Rest Client
179 | .idea/httpRequests
180 | 
181 | 
182 | # -----------------------------------------------------------------------------
183 | # Linux
184 | # -----------------------------------------------------------------------------
185 | 
186 | *~
187 | 
188 | # temporary files which can be created if a process still has a handle open of
189 | # a deleted file
190 | .fuse_hidden*
191 | 
192 | # KDE directory preferences
193 | .directory
194 | 
195 | # Linux trash folder which might appear on any partition or disk
196 | .Trash-*
197 | 
198 | # .nfs files are created when an open file is removed but is still being
199 | # accessed
200 | .nfs*
201 | 
202 | 
203 | # -----------------------------------------------------------------------------
204 | # Vim
205 | # -----------------------------------------------------------------------------
206 | 
207 | # Swap
208 | [._]*.s[a-v][a-z]
209 | [._]*.sw[a-p]
210 | [._]s[a-rt-v][a-z]
211 | [._]ss[a-gi-z]
212 | [._]sw[a-p]
213 | 
214 | # Session
215 | Session.vim
216 | 
217 | # Temporary
218 | .netrwhist
219 | *~
220 | # Auto-generated tag files
221 | tags
222 | # Persistent undo
223 | [._]*.un~
224 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | services:
 3 |     - mongodb
 4 |     - redis-server
 5 | sudo: false
 6 | branches:
 7 |   only:
 8 |     - master
 9 |     - /^\d\.\d+$/
10 |     - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
11 | matrix:
12 |   include:
13 |     - python: 3.6
14 |       env: TOXENV=py36
15 |     - python: 3.7
16 |       env: TOXENV=py37
17 |       dist: xenial
18 |     - python: 3.6
19 |       env: TOXENV=docs
20 | install:
21 |   - pip install -U tox twine wheel codecov
22 | script: tox
23 | after_success:
24 |   - codecov
25 | notifications:
26 |   slack:
27 |     rooms:
28 |       - secure: zsDJgHzhPoAIs8OsOiv5wmNsck++hjZljeYAfKh25UwW8X97Rqvq5r9LMlQzIHf2a638AbsubDNeSbrxxu6cFDeeIFngG1EO5mOSWzKr18LM5pFb0GVlImKzZpKgLqKmaD5ATYXnvUaHjEHgO45TzjIsbwo9P4vRU5C/lGYwfdv/J82hP0OUo02HqWGkwpG0aeuzs1bJZKjS/RdHROt0SQpRfVB8hi4HHrQILgliuVcpvIk46FgRB49VmzpAGuQfJtB06gj8o6tL/1JlXQ9/ElrHwEJyGjiyeP/nP8qit+i9TTlHGT9k0s9oYuXWM8OlgKfKE13Mo8fVRaAhVv9DRcwtNpX5M0RtC5bEjCPQIL14ky4ymeSlGchmy37jTKJCNHm4St4CtodCrF5J77h8Gkjx9tkZOhf4Rd8veMMgv/gj8pyt3asJ8PMDvREjF4n4mRPy5SB53anEhrFXE801KOpqb4Ffsjv2DBJmuAId+OmHLs69jHeiwxkBDaeKDr6rpiiKQaZNbDw5KxjEafEtclVmSdprq57Og2SPaCR1TrUu3SVyUjVoWNj6olKS9ALoiDAVLBprbyBsSS9gYwfTlSBNxCsMApQksjmo0/S6n/FwyCvn4AZZVziLVtVxPBY0sUpRBNySkTTrQzpCiEPNmv7tU8d0ZcVI508/WALzIQ0=
29 | cache:
30 |   directories:
31 |     - $HOME/.cache/pip
32 | #deploy:
33 | #  provider: pypi
34 | #  distributions: sdist bdist_wheel
35 | #  user: grammy.jiang
36 | #  password:
37 | #    secure: nUWjH3+9D9I+Xrsz7isjVKpzXwxlJuWFi2OrWCMSilxUNaMrV/4fA0TShmS40TCxTGSasmApjZgZz+Qu93Z9KlHnP2nmBsEXnqtrrCMIhI52wLFdnMcTCNCutzOiKzVSMK/SvEvRP6+fcWRbsE0n0hVwUZc/Lwz4083OXoMQIuMs3NbVD0rAPcHTBthTwabQjSp8WwYv9wZj/pZQ7qYw+QOe+b8XhQIIA10Oy9rAcyaOGASMsbBithKap91ayj2yRqmM6kb+nwi4aEJo/+XwQuncJWleTOy88Rt+YtoYkDxoHopjwBR2RAoevfq0Y1Mjl9e1mssunzQ053qmXfKAFB77Xn99iR0bmwSwwCtyBgoY/Ed5+wywwNdE6tfNB8/pYXg3z6mTmIwqXCQhn2+ORdD5RFn9RZAV4IoR1z8WRKU6clsVF2Msc9QUsj5wUA7LXkBg4HlVJurZurbFh58ViVTO2aNo6c+7fBiBwbm2aupeB+RlL9kCz14pbJcd89H6ViWByE6O9pFlyNcEt28FaKLIuyWAsAsYOPnj74oYuoV2hZ7y0259ncGX0UsDVzPwaJ/NlQsi4yh2d300mRvOSbiELhBZdABbkN+pgGmE1mlqUkY9GHb070JsOavzedzsuEgBLAaWgTAxeDd9LqFfIE7iFLj+U9v9d73ZtKy4VeE=
38 | #  on:
39 | #    branch: master
40 | ##    tags: true
41 | #    repo: grammy-jiang/scrapy-cookies
42 | #    condition: "$TOXENV == py36"
43 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 |   "generalSettings": {
3 |     "shouldScanRepo": true
4 |   },
5 |   "checkRunSettings": {
6 |     "vulnerableCheckRunConclusionLevel": "failure"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Scrapy-Cookies is a part of Scrapy Enhancement, which intends to explore more
2 | possibility of Scrapy. Once the code is proved useful and stable, it will be
3 | merged back to Scrapy or contributed back to the Scrapy Plugins.
4 | 
5 | Here is the list of the primary authors & contributors:
6 | 
7 |  * Grammy Jiang
8 | 


--------------------------------------------------------------------------------
/INSTALL:
--------------------------------------------------------------------------------
1 | For information about installing Scrapy-Cookies see:
2 | 
3 | * docs/intro/installation.rst (local file)
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Scrapy Enhancement developers.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |     1. Redistributions of source code must retain the above copyright notice,
 8 |        this list of conditions, and the following disclaimer.
 9 | 
10 |     2. Redistributions in binary form must reproduce the above copyright
11 |        notice, this list of conditions, and the following disclaimer in the
12 |        documentation and/or other materials provided with the distribution.
13 | 
14 |     3. Neither the name of Scrapy nor the names of its contributors may be used
15 |        to endorse or promote products derived from this software without
16 |        specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.rst
 2 | include AUTHORS
 3 | include INSTALL
 4 | include LICENSE
 5 | include MANIFEST.in
 6 | include scrapy_cookies/VERSION
 7 | recursive-include docs *
 8 | prune docs/build
 9 | recursive-include tests *
10 | global-exclude __pycache__ *.py[cod]
11 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | name = "pypi"
 3 | url = "https://pypi.org/simple"
 4 | verify_ssl = true
 5 | 
 6 | [dev-packages]
 7 | bandit = "*"
 8 | black = "*"
 9 | flake8 = "*"
10 | flake8-bugbear = "*"
11 | ipython = "*"
12 | isort = "*"
13 | mitmproxy = "*"
14 | mypy = "*"
15 | pre-commit = "*"
16 | prospector = "*"
17 | pylint = "*"
18 | pytest = "*"
19 | pytest-benchmark = "*"
20 | pytest-black = "*"
21 | pytest-cov = "*"
22 | pytest-docker-compose = "*"
23 | pytest-env = "*"
24 | pytest-instafail = "*"
25 | pytest-mypy = "*"
26 | pytest-pycharm = "*"
27 | pytest-pylint = "*"
28 | pytest-sugar = "*"
29 | pytest-twisted = "*"
30 | pytest-watch = "*"
31 | pytest-xdist = "*"
32 | radon = "*"
33 | tox = "*"
34 | testfixtures = "*"
35 | 
36 | [packages]
37 | hiredis = "*"
38 | pymongo = "*"
39 | redis = "*"
40 | six = "*"
41 | ujson = "*"
42 | Scrapy = "*"
43 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ==============
  2 | Scrapy Cookies
  3 | ==============
  4 | 
  5 | .. image:: https://img.shields.io/pypi/v/scrapy-cookies.svg
  6 |    :target: https://pypi.python.org/pypi/scrapy-cookies
  7 |    :alt: PyPI
  8 | 
  9 | .. image:: https://img.shields.io/pypi/pyversions/scrapy-cookies.svg
 10 |    :target: https://pypi.python.org/pypi/scrapy-cookies
 11 |    :alt: PyPI - Python Version
 12 | 
 13 | .. image:: https://img.shields.io/travis/scrapedia/scrapy-cookies/master.svg
 14 |    :target: http://travis-ci.org/scrapedia/scrapy-cookies
 15 |    :alt: Travis branch
 16 | 
 17 | .. image:: https://img.shields.io/pypi/wheel/scrapy-cookies.svg
 18 |    :target: https://pypi.python.org/pypi/scrapy-cookies
 19 |    :alt: PyPI - Wheel
 20 | 
 21 | .. image:: https://img.shields.io/codecov/c/github/scrapedia/scrapy-cookies/master.svg
 22 |    :target: http://codecov.io/github/scrapedia/scrapy-cookies?branch=master
 23 |    :alt: Codecov branch
 24 | 
 25 | Overview
 26 | ========
 27 | 
 28 | This middleware enable Scrapy manage, save and restore cookies in various ways.
 29 | With this middleware Scrapy can easily re-use cookies which saved before or
 30 | in multiple spiders, and share cookies between spiders, even in spider-cluster.
 31 | 
 32 | Requirements
 33 | ============
 34 | 
 35 | * Python 2.7 or Python 3.4+
 36 | * Works on Linux, Windows, Mac OSX, BSD
 37 | 
 38 | Installation
 39 | ============
 40 | 
 41 | The quick way:
 42 | 
 43 |    pip install scrapy-cookies
 44 | 
 45 | For more details see the installation section in the documentation:
 46 | https://scrapy-cookies.readthedocs.io/en/latest/intro/installation.html
 47 | 
 48 | Documentation
 49 | =============
 50 | 
 51 | Documentation is available online at
 52 | https://scrapy-cookies.readthedocs.io/en/latest/ and in the ``docs`` directory.
 53 | 
 54 | Releases
 55 | ========
 56 | 
 57 | You can find release notes at
 58 | https://scrapy-cookies.readthedocs.io/en/latest/news.html
 59 | 
 60 | Community (blog, twitter, mail list, IRC)
 61 | =========================================
 62 | 
 63 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
 64 | 
 65 | See https://scrapy.org/community/
 66 | 
 67 | Contributing
 68 | ============
 69 | 
 70 | *Keeping this section same as Scrapy is intending to be easier when this repo
 71 | merge back to Scrapy.*
 72 | 
 73 | See https://doc.scrapy.org/en/master/contributing.html
 74 | 
 75 | Code of Conduct
 76 | ---------------
 77 | 
 78 | Please note that this project is released with a Contributor Code of Conduct
 79 | (see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md).
 80 | 
 81 | By participating in this project you agree to abide by its terms.
 82 | Please report unacceptable behavior to opensource@scrapinghub.com.
 83 | 
 84 | 
 85 | Companies using Scrapy
 86 | ======================
 87 | 
 88 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
 89 | 
 90 | See https://scrapy.org/companies/
 91 | 
 92 | Commercial Support
 93 | ==================
 94 | 
 95 | *Keeping this section same as Scrapy is intending to benefit back to Scrapy.*
 96 | 
 97 | See https://scrapy.org/support/
 98 | 
 99 | TODO
100 | ====
101 | 
102 | * [X] Remove the support lower than python 3.6
103 | * [ ] Use JSON1 extension in sqlite backend
104 | * [ ] Update backend arguments calling way
105 | * [ ] Replace pymongo with txmongo in MongoDB backend
106 | * [ ] Replace redis sync driver with async driver in Redis backend
107 | * [ ] Change LICENSE to GPLv3
108 | * [ ] Use versioneer for version management
109 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment:
 2 |   layout: "header, diff, tree"
 3 | 
 4 | coverage:
 5 |   status:
 6 |     project: false
 7 |   notify:
 8 |     slack:
 9 |       default:
10 |         url: "secret:KQc0qNe30SGOA3baphzz48aXGWPJlE6qDlk4qZGGdW8fAEEJG8lHubU9301vJCECqEhv5E+JNHXfWKd+bcKjhIc5nhgt2w2BaZyEXEawhaTx0MJZ8xjX/unaul2wA5rL3ZkV4loVbN34sOq7vFgEzSS"
11 |         branches: null
12 |         flags: null
13 |         only_pulls: false
14 |         paths: null
15 |         threshold: 1%
16 | 


--------------------------------------------------------------------------------
/docker/MongoDB/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   # https://hub.docker.com/_/mongo/
 4 |   mongo:
 5 |     container_name: dc-mongo
 6 |     image: mongo:latest
 7 |     networks:
 8 |       - mongo
 9 |     ports:
10 |       - "127.0.0.1:27017:27017"
11 |     restart: always
12 |     tty: true
13 |   # https://hub.docker.com/_/mongo-express/
14 |   mongo-express:
15 |     container_name: dc-mongodb-express
16 |     depends_on:
17 |       - mongo
18 |     environment:
19 |       ME_CONFIG_MONGODB_PORT: 27017
20 |       ME_CONFIG_MONGODB_SERVER: mongo
21 |     image: mongo-express:latest
22 |     links:
23 |       - mongo
24 |     networks:
25 |       - mongo
26 |     ports:
27 |       - "127.0.0.1:8081:8081"
28 |     restart: always
29 |     tty: true
30 | 
31 | networks:
32 |   mongo:
33 |     driver: bridge
34 | 


--------------------------------------------------------------------------------
/docker/Redis/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |     redis:
 4 |         container_name: dc-redis
 5 |         image: redis:latest
 6 |         networks:
 7 |             - redis
 8 |         ports:
 9 |             - "127.0.0.1:6379:6379"
10 |         restart: always
11 |         tty: true
12 |     redis-commander:
13 |         command: --redis-host redis
14 |         container_name: dc-redis-commander
15 |         depends_on:
16 |             - redis
17 |         image: tenstartups/redis-commander:latest
18 |         links:
19 |             - redis
20 |         networks:
21 |             - redis
22 |         ports:
23 |             - "127.0.0.1:8181:8081"
24 |         restart: always
25 |         tty: true
26 | 
27 | networks:
28 |     redis:
29 |         driver: bridge
30 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = Scrapy-Cookies
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/_ext/scrapydocs.py:
--------------------------------------------------------------------------------
  1 | from docutils.parsers.rst.roles import set_classes
  2 | from docutils import nodes
  3 | from docutils.parsers.rst import Directive
  4 | from sphinx.util.nodes import make_refnode
  5 | from operator import itemgetter
  6 | 
  7 | 
  8 | class settingslist_node(nodes.General, nodes.Element):
  9 |     pass
 10 | 
 11 | 
 12 | class SettingsListDirective(Directive):
 13 |     def run(self):
 14 |         return [settingslist_node('')]
 15 | 
 16 | 
 17 | def is_setting_index(node):
 18 |     if node.tagname == 'index':
 19 |         # index entries for setting directives look like:
 20 |         # [(u'pair', u'SETTING_NAME; setting', u'std:setting-SETTING_NAME', '')]
 21 |         entry_type, info, refid = node['entries'][0][:3]
 22 |         return entry_type == 'pair' and info.endswith('; setting')
 23 |     return False
 24 | 
 25 | 
 26 | def get_setting_target(node):
 27 |     # target nodes are placed next to the node in the doc tree
 28 |     return node.parent[node.parent.index(node) + 1]
 29 | 
 30 | 
 31 | def get_setting_name_and_refid(node):
 32 |     """Extract setting name from directive index node"""
 33 |     entry_type, info, refid = node['entries'][0][:3]
 34 |     return info.replace('; setting', ''), refid
 35 | 
 36 | 
 37 | def collect_scrapy_settings_refs(app, doctree):
 38 |     env = app.builder.env
 39 | 
 40 |     if not hasattr(env, 'scrapy_all_settings'):
 41 |         env.scrapy_all_settings = []
 42 | 
 43 |     for node in doctree.traverse(is_setting_index):
 44 |         targetnode = get_setting_target(node)
 45 |         assert isinstance(targetnode, nodes.target), "Next node is not a target"
 46 | 
 47 |         setting_name, refid = get_setting_name_and_refid(node)
 48 | 
 49 |         env.scrapy_all_settings.append({
 50 |             'docname': env.docname,
 51 |             'setting_name': setting_name,
 52 |             'refid': refid,
 53 |         })
 54 | 
 55 | 
 56 | def make_setting_element(setting_data, app, fromdocname):
 57 |     refnode = make_refnode(app.builder, fromdocname,
 58 |                            todocname=setting_data['docname'],
 59 |                            targetid=setting_data['refid'],
 60 |                            child=nodes.Text(setting_data['setting_name']))
 61 |     p = nodes.paragraph()
 62 |     p += refnode
 63 | 
 64 |     item = nodes.list_item()
 65 |     item += p
 66 |     return item
 67 | 
 68 | 
 69 | def replace_settingslist_nodes(app, doctree, fromdocname):
 70 |     env = app.builder.env
 71 | 
 72 |     for node in doctree.traverse(settingslist_node):
 73 |         settings_list = nodes.bullet_list()
 74 |         settings_list.extend([make_setting_element(d, app, fromdocname)
 75 |                               for d in sorted(env.scrapy_all_settings,
 76 |                                               key=itemgetter('setting_name'))
 77 |                               if fromdocname != d['docname']])
 78 |         node.replace_self(settings_list)
 79 | 
 80 | 
 81 | def setup(app):
 82 |     app.add_crossref_type(
 83 |         directivename = "setting",
 84 |         rolename      = "setting",
 85 |         indextemplate = "pair: %s; setting",
 86 |     )
 87 |     app.add_crossref_type(
 88 |         directivename = "signal",
 89 |         rolename      = "signal",
 90 |         indextemplate = "pair: %s; signal",
 91 |     )
 92 |     app.add_crossref_type(
 93 |         directivename = "command",
 94 |         rolename      = "command",
 95 |         indextemplate = "pair: %s; command",
 96 |     )
 97 |     app.add_crossref_type(
 98 |         directivename = "reqmeta",
 99 |         rolename      = "reqmeta",
100 |         indextemplate = "pair: %s; reqmeta",
101 |     )
102 |     app.add_role('source', source_role)
103 |     app.add_role('commit', commit_role)
104 |     app.add_role('issue', issue_role)
105 |     app.add_role('rev', rev_role)
106 | 
107 |     app.add_node(settingslist_node)
108 |     app.add_directive('settingslist', SettingsListDirective)
109 | 
110 |     app.connect('doctree-read', collect_scrapy_settings_refs)
111 |     app.connect('doctree-resolved', replace_settingslist_nodes)
112 | 
113 | 
114 | def source_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
115 |     ref = 'https://github.com/scrapy/scrapy/blob/master/' + text
116 |     set_classes(options)
117 |     node = nodes.reference(rawtext, text, refuri=ref, **options)
118 |     return [node], []
119 | 
120 | 
121 | def issue_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
122 |     ref = 'https://github.com/scrapy/scrapy/issues/' + text
123 |     set_classes(options)
124 |     node = nodes.reference(rawtext, 'issue ' + text, refuri=ref, **options)
125 |     return [node], []
126 | 
127 | 
128 | def commit_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
129 |     ref = 'https://github.com/scrapy/scrapy/commit/' + text
130 |     set_classes(options)
131 |     node = nodes.reference(rawtext, 'commit ' + text, refuri=ref, **options)
132 |     return [node], []
133 | 
134 | 
135 | def rev_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
136 |     ref = 'http://hg.scrapy.org/scrapy/changeset/' + text
137 |     set_classes(options)
138 |     node = nodes.reference(rawtext, 'r' + text, refuri=ref, **options)
139 |     return [node], []
140 | 


--------------------------------------------------------------------------------
/docs/_static/selectors-sample1.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |  <head>
 3 |   <base href='http://example.com/' />
 4 |   <title>Example website</title>
 5 |  </head>
 6 |  <body>
 7 |   <div id='images'>
 8 |    <a href='image1.html'>Name: My image 1 <br /><img src='image1_thumb.jpg' /></a>
 9 |    <a href='image2.html'>Name: My image 2 <br /><img src='image2_thumb.jpg' /></a>
10 |    <a href='image3.html'>Name: My image 3 <br /><img src='image3_thumb.jpg' /></a>
11 |    <a href='image4.html'>Name: My image 4 <br /><img src='image4_thumb.jpg' /></a>
12 |    <a href='image5.html'>Name: My image 5 <br /><img src='image5_thumb.jpg' /></a>
13 |   </div>
14 |  </body>
15 | </html>
16 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% block footer %}
 4 | {{ super() }}
 5 | <script type="text/javascript">
 6 | !function(){var analytics=window.analytics=window.analytics||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","page","once","off","on"];analytics.factory=function(t){return function(){var e=Array.prototype.slice.call(arguments);e.unshift(t);analytics.push(e);return analytics}};for(var t=0;t<analytics.methods.length;t++){var e=analytics.methods[t];analytics[e]=analytics.factory(e)}analytics.load=function(t){var e=document.createElement("script");e.type="text/javascript";e.async=!0;e.src=("https:"===document.location.protocol?"https://":"http://")+"cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js";var n=document.getElementsByTagName("script")[0];n.parentNode.insertBefore(e,n)};analytics.SNIPPET_VERSION="3.1.0";
 7 | analytics.load("8UDQfnf3cyFSTsM4YANnW5sXmgZVILbA");
 8 | analytics.page();
 9 | }}();
10 | 
11 | analytics.ready(function () {
12 |     ga('require', 'linker');
13 |     ga('linker:autoLink', ['scrapinghub.com', 'crawlera.com']);
14 | });
15 | </script>
16 | {% endblock %}
17 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Scrapy-Cookies documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Nov 24 12:02:52 2008.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # The contents of this file are pickled, so don't put values in the namespace
  9 | # that aren't pickleable (module imports are okay, they're removed
 10 | # automatically).
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | from os import path
 17 | 
 18 | # If your extensions are in another directory, add it here. If the directory
 19 | # is relative to the documentation root, use os.path.abspath to make it
 20 | # absolute, like shown here.
 21 | sys.path.append(path.join(path.dirname(__file__), "_ext"))
 22 | sys.path.insert(0, path.dirname(path.dirname(__file__)))
 23 | 
 24 | 
 25 | # General configuration
 26 | # ---------------------
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be extensions
 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 30 | extensions = [
 31 |     'scrapydocs',
 32 |     'sphinx.ext.autodoc'
 33 | ]
 34 | 
 35 | # Add any paths that contain templates here, relative to this directory.
 36 | templates_path = ['_templates']
 37 | 
 38 | # The suffix of source filenames.
 39 | source_suffix = '.rst'
 40 | 
 41 | # The encoding of source files.
 42 | #source_encoding = 'utf-8'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = 'Scrapy-Cookies'
 49 | copyright = '2018, Scrapy Enhancement developers'
 50 | 
 51 | # The version info for the project you're documenting, acts as replacement for
 52 | # |version| and |release|, also used in various other places throughout the
 53 | # built documents.
 54 | #
 55 | # The short X.Y version.
 56 | try:
 57 |     import scrapy_cookies
 58 |     version = '.'.join(map(str, scrapy_cookies.version_info[:2]))
 59 |     release = scrapy_cookies.__version__
 60 | except ImportError:
 61 |     version = ''
 62 |     release = ''
 63 | 
 64 | # The language for content autogenerated by Sphinx. Refer to documentation
 65 | # for a list of supported languages.
 66 | language = 'en'
 67 | 
 68 | # There are two options for replacing |today|: either, you set today to some
 69 | # non-false value, then it is used:
 70 | #today = ''
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | #today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of documents that shouldn't be included in the build.
 75 | #unused_docs = []
 76 | 
 77 | # List of directories, relative to source directory, that shouldn't be searched
 78 | # for source files.
 79 | exclude_trees = ['.build']
 80 | 
 81 | # The reST default role (used for this markup: `text`) to use for all documents.
 82 | #default_role = None
 83 | 
 84 | # If true, '()' will be appended to :func: etc. cross-reference text.
 85 | #add_function_parentheses = True
 86 | 
 87 | # If true, the current module name will be prepended to all description
 88 | # unit titles (such as .. function::).
 89 | #add_module_names = True
 90 | 
 91 | # If true, sectionauthor and moduleauthor directives will be shown in the
 92 | # output. They are ignored by default.
 93 | #show_authors = False
 94 | 
 95 | # The name of the Pygments (syntax highlighting) style to use.
 96 | pygments_style = 'sphinx'
 97 | 
 98 | 
 99 | # Options for HTML output
100 | # -----------------------
101 | 
102 | # The theme to use for HTML and HTML Help pages.  See the documentation for
103 | # a list of builtin themes.
104 | html_theme = 'sphinx_rtd_theme'
105 | 
106 | # Theme options are theme-specific and customize the look and feel of a theme
107 | # further.  For a list of options available for each theme, see the
108 | # documentation.
109 | #html_theme_options = {}
110 | 
111 | # Add any paths that contain custom themes here, relative to this directory.
112 | # Add path to the RTD explicitly to robustify builds (otherwise might
113 | # fail in a clean Debian build env)
114 | import sphinx_rtd_theme
115 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
116 | 
117 | 
118 | # The style sheet to use for HTML and HTML Help pages. A file of that name
119 | # must exist either in Sphinx' static/ path, or in one of the custom paths
120 | # given in html_static_path.
121 | # html_style = 'scrapydoc.css'
122 | 
123 | # The name for this set of Sphinx documents.  If None, it defaults to
124 | # "<project> v<release> documentation".
125 | #html_title = None
126 | 
127 | # A shorter title for the navigation bar.  Default is the same as html_title.
128 | #html_short_title = None
129 | 
130 | # The name of an image file (relative to this directory) to place at the top
131 | # of the sidebar.
132 | #html_logo = None
133 | 
134 | # The name of an image file (within the static path) to use as favicon of the
135 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
136 | # pixels large.
137 | #html_favicon = None
138 | 
139 | # Add any paths that contain custom static files (such as style sheets) here,
140 | # relative to this directory. They are copied after the builtin static files,
141 | # so a file named "default.css" will overwrite the builtin "default.css".
142 | html_static_path = ['_static']
143 | 
144 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
145 | # using the given strftime format.
146 | html_last_updated_fmt = '%b %d, %Y'
147 | 
148 | # Custom sidebar templates, maps document names to template names.
149 | #html_sidebars = {}
150 | 
151 | # Additional templates that should be rendered to pages, maps page names to
152 | # template names.
153 | #html_additional_pages = {}
154 | 
155 | # If false, no module index is generated.
156 | #html_use_modindex = True
157 | 
158 | # If false, no index is generated.
159 | #html_use_index = True
160 | 
161 | # If true, the index is split into individual pages for each letter.
162 | #html_split_index = False
163 | 
164 | # If true, the reST sources are included in the HTML build as _sources/<name>.
165 | html_copy_source = True
166 | 
167 | # If true, an OpenSearch description file will be output, and all pages will
168 | # contain a <link> tag referring to it.  The value of this option must be the
169 | # base URL from which the finished HTML is served.
170 | #html_use_opensearch = ''
171 | 
172 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
173 | #html_file_suffix = ''
174 | 
175 | # Output file base name for HTML help builder.
176 | htmlhelp_basename = 'Scrapydoc'
177 | 
178 | 
179 | # Options for LaTeX output
180 | # ------------------------
181 | 
182 | # The paper size ('letter' or 'a4').
183 | #latex_paper_size = 'letter'
184 | 
185 | # The font size ('10pt', '11pt' or '12pt').
186 | #latex_font_size = '10pt'
187 | 
188 | # Grouping the document tree into LaTeX files. List of tuples
189 | # (source start file, target name, title, author, document class [howto/manual]).
190 | latex_documents = [
191 |   ('index', 'Scrapy.tex', u'Scrapy Documentation',
192 |    u'Scrapy developers', 'manual'),
193 | ]
194 | 
195 | # The name of an image file (relative to this directory) to place at the top of
196 | # the title page.
197 | #latex_logo = None
198 | 
199 | # For "manual" documents, if this is true, then toplevel headings are parts,
200 | # not chapters.
201 | #latex_use_parts = False
202 | 
203 | # Additional stuff for the LaTeX preamble.
204 | #latex_preamble = ''
205 | 
206 | # Documents to append as an appendix to all manuals.
207 | #latex_appendices = []
208 | 
209 | # If false, no module index is generated.
210 | #latex_use_modindex = True
211 | 
212 | 
213 | # Options for the linkcheck builder
214 | # ---------------------------------
215 | 
216 | # A list of regular expressions that match URIs that should not be checked when
217 | # doing a linkcheck build.
218 | linkcheck_ignore = [
219 |     'http://localhost:\d+', 'http://hg.scrapy.org',
220 |     'http://directory.google.com/'
221 | ]
222 | 


--------------------------------------------------------------------------------
/docs/conf.py.bak:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | sys.path.append(path.join(path.dirname(__file__), "_ext"))
 20 | sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
 21 | 
 22 | # -- Project information -----------------------------------------------------
 23 | 
 24 | project = 'Scrapy-Cookies'
 25 | copyright = '2018, Grammy Jiang'
 26 | author = 'Grammy Jiang'
 27 | 
 28 | try:
 29 |     import scrapy
 30 |     version = '.'.join(map(str, scrapy.version_info[:2]))
 31 |     release = scrapy.__version__
 32 | except ImportError:
 33 |     version = ''
 34 |     release = ''
 35 | 
 36 | # The short X.Y version
 37 | # version = ''
 38 | # The full version, including alpha/beta/rc tags
 39 | # release = '0.0.1'
 40 | 
 41 | 
 42 | # -- General configuration ---------------------------------------------------
 43 | 
 44 | # If your documentation needs a minimal Sphinx version, state it here.
 45 | #
 46 | # needs_sphinx = '1.0'
 47 | 
 48 | # Add any Sphinx extension module names here, as strings. They can be
 49 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 50 | # ones.
 51 | extensions = [
 52 |     'scrapydocs',
 53 |     'sphinx.ext.autodoc',
 54 |     'sphinx.ext.doctest',
 55 |     'sphinx.ext.intersphinx',
 56 |     'sphinx.ext.todo',
 57 |     'sphinx.ext.coverage',
 58 |     'sphinx.ext.mathjax',
 59 |     'sphinx.ext.ifconfig',
 60 |     'sphinx.ext.viewcode',
 61 |     'sphinx.ext.githubpages',
 62 | ]
 63 | 
 64 | # Add any paths that contain templates here, relative to this directory.
 65 | templates_path = ['_templates']
 66 | 
 67 | # The suffix(es) of source filenames.
 68 | # You can specify multiple suffix as a list of string:
 69 | #
 70 | # source_suffix = ['.rst', '.md']
 71 | source_suffix = '.rst'
 72 | 
 73 | # The master toctree document.
 74 | master_doc = 'index'
 75 | 
 76 | # The language for content autogenerated by Sphinx. Refer to documentation
 77 | # for a list of supported languages.
 78 | #
 79 | # This is also used if you do content translation via gettext catalogs.
 80 | # Usually you set "language" from the command line for these cases.
 81 | language = None
 82 | 
 83 | # List of patterns, relative to source directory, that match files and
 84 | # directories to ignore when looking for source files.
 85 | # This pattern also affects html_static_path and html_extra_path .
 86 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 87 | 
 88 | # The name of the Pygments (syntax highlighting) style to use.
 89 | pygments_style = 'sphinx'
 90 | 
 91 | 
 92 | # -- Options for HTML output -------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | #
 97 | html_theme = 'alabaster'
 98 | 
 99 | # Theme options are theme-specific and customize the look and feel of a theme
100 | # further.  For a list of options available for each theme, see the
101 | # documentation.
102 | #
103 | # html_theme_options = {}
104 | 
105 | # Add any paths that contain custom static files (such as style sheets) here,
106 | # relative to this directory. They are copied after the builtin static files,
107 | # so a file named "default.css" will overwrite the builtin "default.css".
108 | html_static_path = ['_static']
109 | 
110 | # Custom sidebar templates, must be a dictionary that maps document names
111 | # to template names.
112 | #
113 | # The default sidebars (for documents that don't match any pattern) are
114 | # defined by theme itself.  Builtin themes are using these templates by
115 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
116 | # 'searchbox.html']``.
117 | #
118 | # html_sidebars = {}
119 | 
120 | 
121 | # -- Options for HTMLHelp output ---------------------------------------------
122 | 
123 | # Output file base name for HTML help builder.
124 | htmlhelp_basename = 'Scrapy-Cookiesdoc'
125 | 
126 | 
127 | # -- Options for LaTeX output ------------------------------------------------
128 | 
129 | latex_elements = {
130 |     # The paper size ('letterpaper' or 'a4paper').
131 |     #
132 |     # 'papersize': 'letterpaper',
133 | 
134 |     # The font size ('10pt', '11pt' or '12pt').
135 |     #
136 |     # 'pointsize': '10pt',
137 | 
138 |     # Additional stuff for the LaTeX preamble.
139 |     #
140 |     # 'preamble': '',
141 | 
142 |     # Latex figure (float) alignment
143 |     #
144 |     # 'figure_align': 'htbp',
145 | }
146 | 
147 | # Grouping the document tree into LaTeX files. List of tuples
148 | # (source start file, target name, title,
149 | #  author, documentclass [howto, manual, or own class]).
150 | latex_documents = [
151 |     (master_doc, 'Scrapy-Cookies.tex', 'Scrapy-Cookies Documentation',
152 |      'Grammy Jiang', 'manual'),
153 | ]
154 | 
155 | 
156 | # -- Options for manual page output ------------------------------------------
157 | 
158 | # One entry per manual page. List of tuples
159 | # (source start file, name, description, authors, manual section).
160 | man_pages = [
161 |     (master_doc, 'scrapy-cookies', 'Scrapy-Cookies Documentation',
162 |      [author], 1)
163 | ]
164 | 
165 | 
166 | # -- Options for Texinfo output ----------------------------------------------
167 | 
168 | # Grouping the document tree into Texinfo files. List of tuples
169 | # (source start file, target name, title, author,
170 | #  dir menu entry, description, category)
171 | texinfo_documents = [
172 |     (master_doc, 'Scrapy-Cookies', 'Scrapy-Cookies Documentation',
173 |      author, 'Scrapy-Cookies', 'One line description of project.',
174 |      'Miscellaneous'),
175 | ]
176 | 
177 | 
178 | # -- Options for Epub output -------------------------------------------------
179 | 
180 | # Bibliographic Dublin Core info.
181 | epub_title = project
182 | epub_author = author
183 | epub_publisher = author
184 | epub_copyright = copyright
185 | 
186 | # The unique identifier of the text. This can be a ISBN number
187 | # or the project homepage.
188 | #
189 | # epub_identifier = ''
190 | 
191 | # A unique identification for the text.
192 | #
193 | # epub_uid = ''
194 | 
195 | # A list of files that should not be packed into the epub file.
196 | epub_exclude_files = ['search.html']
197 | 
198 | 
199 | # -- Extension configuration -------------------------------------------------
200 | 
201 | # -- Options for intersphinx extension ---------------------------------------
202 | 
203 | # Example configuration for intersphinx: refer to the Python standard library.
204 | intersphinx_mapping = {'https://docs.python.org/': None}
205 | 
206 | # -- Options for todo extension ----------------------------------------------
207 | 
208 | # If true, `todo` and `todoList` produce output, else they produce nothing.
209 | todo_include_todos = True
210 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. _topics-index:
 2 | 
 3 | ======================================
 4 | Scrapy-Cookies |version| documentation
 5 | ======================================
 6 | 
 7 | This documentation contains everything you need to know about Scrapy-Cookies.
 8 | 
 9 | First steps
10 | ===========
11 | 
12 | .. toctree::
13 |    :caption: First steps
14 |    :hidden:
15 | 
16 |    intro/overview
17 |    intro/installation
18 |    intro/tutorial
19 |    intro/examples
20 | 
21 | :doc:`intro/overview`
22 |     Understand what Scrapy-Cookies is and how it can help you.
23 | 
24 | :doc:`intro/installation`
25 |     Get Scrapy-Cookies installed on your computer.
26 | 
27 | :doc:`intro/tutorial`
28 |     Write your first project with Scrapy-Cookies.
29 | 
30 | :doc:`intro/examples`
31 |     Learn more by playing with a pre-made project with Scrapy-Cookies.
32 | 
33 | .. _section-basics:
34 | 
35 | Basic concepts
36 | ==============
37 | 
38 | .. toctree::
39 |    :caption: Basic concepts
40 |    :hidden:
41 | 
42 |    topics/cookiesmiddleware
43 |    topics/storage
44 |    topics/settings
45 | 
46 | 
47 | :doc:`topics/cookiesmiddleware`
48 |     Extract cookies from response and Restore cookies to request.
49 | 
50 | :doc:`topics/storage`
51 |     Save ,restore and share the cookies.
52 | 
53 | :doc:`topics/settings`
54 |     Learn how to configure Scrapy-Cookies and see all available settings.
55 | 
56 | 
57 | .. _extending-scrapy:
58 | 
59 | Extending Scrapy-Cookies
60 | ========================
61 | 
62 | .. toctree::
63 |    :caption: Extending Scrapy-Cookies
64 |    :hidden:
65 | 
66 |    topics/storage
67 | 
68 | 
69 | :doc:`topics/storage`
70 |     Customize how the storage save, restore and share the cookies
71 | 


--------------------------------------------------------------------------------
/docs/intro/examples.rst:
--------------------------------------------------------------------------------
 1 | .. _intro-examples:
 2 | 
 3 | ========
 4 | Examples
 5 | ========
 6 | 
 7 | The best way to learn is with examples, and Scrapy-Cookies is no exception. For
 8 | this reason, there is an example project with Scrapy-Cookies named grouponbot_,
 9 | that you can use to play and learn more about Scrapy-Cookies. It contains one
10 | spiders for https://www.groupon.com.au, only crawl the first page and save the
11 | cookies.
12 | 
13 | The grouponbot_ project is available at:
14 | https://github.com/grammy-jiang/scrapy-enhancement-examples. You can find more
15 | information about it in the project's README.
16 | 
17 | If you're familiar with git, you can checkout the code. Otherwise you can
18 | download the project as a zip file by clicking
19 | `here <https://github.com/grammy-jiang/scrapy-enhancement-examples/archive/master.zip>`_.
20 | 
21 | .. _grouponbot: https://github.com/grammy-jiang/scrapy-enhancement-examples
22 | 


--------------------------------------------------------------------------------
/docs/intro/installation.rst:
--------------------------------------------------------------------------------
  1 | .. _intro-installation:
  2 | 
  3 | ==================
  4 | Installation guide
  5 | ==================
  6 | 
  7 | Installing Scrapy
  8 | =================
  9 | 
 10 | Scrapy-Cookies runs on Python 2.7 and Python 3.4 or above under CPython (default
 11 | Python implementation) and PyPy (starting with PyPy 5.9).
 12 | 
 13 | You can install Scrapy-Cookies and its dependencies from PyPI with::
 14 | 
 15 |     pip install Scrapy-Cookies
 16 | 
 17 | We strongly recommend that you install Scrapy and Scrapy-Cookies in
 18 | :ref:`a dedicated virtualenv <intro-using-virtualenv>`, to avoid conflicting
 19 | with your system packages.
 20 | 
 21 | For more detailed and platform specifics instructions, read on.
 22 | 
 23 | 
 24 | Things that are good to know
 25 | ----------------------------
 26 | 
 27 | Scrapy-Cookies is written in pure Python and depends on a few key Python
 28 | packages (among others):
 29 | 
 30 | * `Scrapy`_, of course
 31 | * `PyMongo`_
 32 | * `redis-py`_
 33 | * `ujson`_
 34 | 
 35 | The minimal versions which Scrapy-Cookies is tested against are:
 36 | 
 37 | * Scrapy 1.5.0
 38 | 
 39 | Scrapy-Cookies may work with older versions of these packages but it is not
 40 | guaranteed it will continue working because it’s not being tested against them.
 41 | 
 42 | .. _Scrapy: https://scrapy.org/
 43 | .. _PyMongo: http://api.mongodb.com/python/current/
 44 | .. _redis-py: https://redis-py.readthedocs.io/en/latest/
 45 | .. _ujson: https://github.com/esnme/ultrajson
 46 | 
 47 | 
 48 | .. _intro-using-virtualenv:
 49 | 
 50 | Using a virtual environment (recommended)
 51 | -----------------------------------------
 52 | 
 53 | TL;DR: We recommend installing Scrapy-Cookies inside a virtual environment on
 54 | all platforms.
 55 | 
 56 | Python packages can be installed either globally (a.k.a system wide), or in
 57 | user-space. We do not recommend installing Scrapy and Scrapy-Cookies
 58 | system wide.
 59 | 
 60 | Instead, we recommend that you install Scrapy and Scrapy-Cookies within a
 61 | so-called "virtual environment" (`virtualenv`_). Virtualenvs allow you to not
 62 | conflict with already-installed Python system packages (which could break some
 63 | of your system tools and scripts), and still install packages normally with
 64 | ``pip`` (without ``sudo`` and the likes).
 65 | 
 66 | To get started with virtual environments, see
 67 | `virtualenv installation instructions`_. To install it globally (having it
 68 | globally installed actually helps here), it should be a matter of running::
 69 | 
 70 |     $ [sudo] pip install virtualenv
 71 | 
 72 | Check this `user guide`_ on how to create your virtualenv.
 73 | 
 74 | .. note::
 75 |     If you use Linux or OS X, `virtualenvwrapper`_ is a handy tool to create
 76 |     virtualenvs.
 77 | 
 78 | Once you have created a virtualenv, you can install Scrapy-Cookies inside it
 79 | with ``pip``, just like any other Python package.
 80 | (See :ref:`platform-specific guides <intro-install-platform-notes>`
 81 | below for non-Python dependencies that you may need to install beforehand).
 82 | 
 83 | Python virtualenvs can be created to use Python 2 by default, or Python 3 by
 84 | default.
 85 | 
 86 | * If you want to install Scrapy-Cookies with Python 3, install Scrapy-Cookies
 87 |   within a Python 3 virtualenv.
 88 | * And if you want to install Scrapy-Cookies with Python 2, install
 89 |   Scrapy-Cookies within a Python 2 virtualenv.
 90 | 
 91 | .. _virtualenv: https://virtualenv.pypa.io
 92 | .. _virtualenv installation instructions: https://virtualenv.pypa.io/en/stable/installation/
 93 | .. _virtualenvwrapper: https://virtualenvwrapper.readthedocs.io/en/latest/install.html
 94 | .. _user guide: https://virtualenv.pypa.io/en/stable/userguide/
 95 | 
 96 | 
 97 | .. _intro-install-platform-notes:
 98 | 
 99 | Platform specific installation notes
100 | ====================================
101 | 
102 | .. _intro-install-windows:
103 | 
104 | Windows
105 | -------
106 | 
107 | Same as Scrapy.
108 | 
109 | 
110 | .. _intro-install-ubuntu:
111 | 
112 | Ubuntu 14.04 or above
113 | ---------------------
114 | 
115 | Same as Scrapy.
116 | 
117 | 
118 | .. _intro-install-macos:
119 | 
120 | Mac OS X
121 | --------
122 | 
123 | Same as Scrapy.
124 | 
125 | 
126 | PyPy
127 | ----
128 | 
129 | Same as Scrapy.
130 | 


--------------------------------------------------------------------------------
/docs/intro/overview.rst:
--------------------------------------------------------------------------------
 1 | .. _intro-overview:
 2 | 
 3 | ==========================
 4 | Scrapy-Cookies at a glance
 5 | ==========================
 6 | 
 7 | Scrapy-Cookies is a downloader middleware for Scrapy.
 8 | 
 9 | Even though Scrapy-Cookies was originally designed for cookies save and restore
10 | (manage the login session), it can also be used to share cookies between various
11 | spider nodes.
12 | 
13 | 
14 | Walk-through of an example spider
15 | =================================
16 | 
17 | In order to show you what Scrapy-Cookies brings to the table, we'll walk you
18 | through an example of a Scrapy project's settings with Scrapy-Cookies using the
19 | simplest way to save and restore the cookies.
20 | 
21 | Here's the code for settings that uses in memory as storage::
22 | 
23 |     DOWNLOADER_MIDDLEWARES.update({
24 |         'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': None,
25 |         'scrapy_cookies.downloadermiddlewares.cookies.CookiesMiddleware': 700,
26 |     })
27 | 
28 |     COOKIES_ENABLED = True
29 | 
30 |     COOKIES_PERSISTENCE = True
31 |     COOKIES_PERSISTENCE_DIR = 'cookies'
32 | 
33 |     # ------------------------------------------------------------------------------
34 |     # IN MEMORY STORAGE
35 |     # ------------------------------------------------------------------------------
36 | 
37 |     COOKIES_STORAGE = 'scrapy_cookies.storage.in_memory.InMemoryStorage'
38 | 
39 | Put this in your project's settings, and run your spider.
40 | 
41 | When this finishes you will have a ``cookies`` file in the folder ``.scrapy``
42 | under your project folder. The file ``cookies`` is the pickled object contained
43 | cookies from your spider.
44 | 
45 | 
46 | What just happened?
47 | -------------------
48 | 
49 | When you run your spider, this middleware initializes all objects related to
50 | maintaining cookies.
51 | 
52 | The crawl starts to send requests and receive responses, at the same time this
53 | middleware extracts and sets the cookies from and to requests and responses.
54 | 
55 | When the spider stopped, this middleware will save the cookies to the path
56 | defined in ``COOKIES_PERSISTENCE_DIR``.
57 | 
58 | 
59 | .. _topics-whatelse:
60 | 
61 | What else?
62 | ==========
63 | 
64 | You've seen how to save and store cookies with Scrapy-Cookies. And this
65 | middleware provides an interface to let you customize your own cookies storage
66 | ways, such as:
67 | 
68 | 
69 | * In-memory storage, with ultra-fast speed to process
70 | 
71 | * SQLite storage, with ultra-fast speed when uses memory database, and easy to
72 |   read and sharing with other process on disk databases
73 | 
74 | * Other database like MongoDB, MySQL, even HBase to integrate with other
75 |   programmes across your
76 | 
77 | 
78 | What's next?
79 | ============
80 | 
81 | The next steps for you are to
82 | :ref:`install Scrapy-Cookies <intro-installation>`,
83 | :ref:`follow through the tutorial <intro-tutorial>` to learn how to create
84 | a project with Scrapy-Cookies and `join the community`_. Thanks for your
85 | interest!
86 | 
87 | .. _join the community: https://scrapy.org/community/
88 | 


--------------------------------------------------------------------------------
/docs/intro/tutorial.rst:
--------------------------------------------------------------------------------
 1 | .. _intro-tutorial:
 2 | 
 3 | =======================
 4 | Scrapy-Cookies Tutorial
 5 | =======================
 6 | 
 7 | In this tutorial, we'll assume that Scrapy-Cookies is already installed on your
 8 | system. If that's not the case, see :ref:`intro-installation`.
 9 | 
10 | This tutorial will walk you through these tasks:
11 | 
12 | 1. Use various storage classes in this middleware
13 | 2. Save cookies on disk
14 | 
15 | 
16 | Use various storage classes in this middleware
17 | ==============================================
18 | 
19 | Before you start scraping, just put the following code into your settings.py::
20 | 
21 |     DOWNLOADER_MIDDLEWARES.update({
22 |         'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': None,
23 |         'scrapy_cookies.downloadermiddlewares.cookies.CookiesMiddleware': 700,
24 |     })
25 | 
26 | With the default settings of this middleware, a in-memory storage will be used.
27 | 
28 | There is a storage named SQLiteStorage. If you want to use it instead of the
29 | in-memory one, simple put the following code below the previous one::
30 | 
31 |     COOKIES_STORAGE = 'scrapy_cookies.storage.sqlite.SQLiteStorage'
32 |     COOKIES_SQLITE_DATABASE = ':memory:'
33 | 
34 | There are other storage classes provided with this middleware, please refer to
35 | :ref:`topics-storage`.
36 | 
37 | When you implement your own storage, you can set ``COOKIES_STORAGE`` to your own
38 | one.
39 | 
40 | 
41 | Save cookies and restore in your next run
42 | =========================================
43 | 
44 | By default this middleware would not save the cookies. When you need to keep
45 | the cookies for further usage, for example a login cookie, you wish to save the
46 | cookies on disk for next run.
47 | 
48 | This middleware provides this ability with one setting::
49 | 
50 |     COOKIES_PERSISTENCE = True
51 | 
52 | Most of time the file saved cookies is named ``cookies`` under the folder
53 | ``.scrapy``. If you want to change it, use this setting::
54 | 
55 |     COOKIES_PERSISTENCE_DIR = 'your-cookies-path'
56 | 
57 | After these settings, this middleware would load the previous saved cookies in
58 | the next run.
59 | 
60 | .. note:: Please keep the storage is the same class when you want save the
61 |   cookies and restore them. The cookies persistence file is not compatible
62 |   between different storage classes.
63 | 
64 | .. note:: This feature depends on the storage class used.
65 | 
66 | Next steps
67 | ==========
68 | 
69 | This tutorial covered only the basics of Scrapy-Cookies, but there's a lot of
70 | other features not mentioned here. Check the :ref:`topics-whatelse` section in
71 | :ref:`intro-overview` chapter for a quick overview of the most important ones.
72 | 
73 | You can continue from the section :ref:`section-basics` to know more about this
74 | middleware, storage and other things this tutorial hasn't covered. If you prefer
75 | to play with an example project, check the :ref:`intro-examples` section.
76 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=Scrapy-Cookies
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | Sphinx==3.2.1
2 | sphinx_rtd_theme
3 | 


--------------------------------------------------------------------------------
/docs/topics/cookiesmiddleware.rst:
--------------------------------------------------------------------------------
 1 | .. _topics-cookiesmiddleware:
 2 | 
 3 | =================
 4 | CookiesMiddleware
 5 | =================
 6 | 
 7 | This is the downloader middleware to inject cookies into requests and extract
 8 | cookies from responses.
 9 | 
10 | This middleware mostly inherits the one from Scrapy, which implements the
11 | interface of `downloader middleware`_. With minimum changes, now
12 | it supports the storage class which implements a certain interface (actually
13 | MutableMapping_).
14 | 
15 | .. _downloader middleware: https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
16 | .. _MutableMapping: https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping
17 | 


--------------------------------------------------------------------------------
/docs/topics/settings.rst:
--------------------------------------------------------------------------------
  1 | .. _topic-settings:
  2 | 
  3 | ========
  4 | Settings
  5 | ========
  6 | 
  7 | The default settings of this middleware keeps the same behaviour as the one in
  8 | Scrapy.
  9 | 
 10 | As an enhancement, there are some settings added in this middleware:
 11 | 
 12 | .. setting:: COOKIES_PERSISTENCE
 13 | 
 14 | COOKIES_PERSISTENCE
 15 | ~~~~~~~~~~~~~~~~~~~
 16 | 
 17 | Default: ``False``
 18 | 
 19 | Whether to enable this cookies middleware save the cookies on disk. If disabled,
 20 | no cookies will be saved on disk.
 21 | 
 22 | Notice that this setting only affects when the storage uses memory as cookies
 23 | container.
 24 | 
 25 | .. setting:: COOKIES_DEBUG
 26 | 
 27 | COOKIES_PERSISTENCE_DIR
 28 | ~~~~~~~~~~~~~~~~~~~~~~~
 29 | 
 30 | Default: ``cookies``
 31 | 
 32 | When ``COOKIES_PERSISTENCE`` is True, the storage which use memory as cookies
 33 | container will save the cookies in the file ``cookies`` under the folder
 34 | ``.scrapy`` in your project, while if the storage does not use memory as cookies
 35 | container will not affect by this setting.
 36 | 
 37 | .. setting:: COOKIES_STORAGE
 38 | 
 39 | COOKIES_STORAGE
 40 | ~~~~~~~~~~~~~~~
 41 | 
 42 | Default: ``scrapy_cookies.storage.in_memory.InMemoryStorage``
 43 | 
 44 | With this setting, the storage can be specified. There are some storage classes
 45 | provided with this middleware by default:
 46 | 
 47 | * :ref:`scrapy_cookies.storage.in_memory.InMemoryStorage<storage-inmemory>`
 48 | * :ref:`scrapy_cookies.storage.sqlite.SQLiteStorage<storage-sqlite>`
 49 | * :ref:`scrapy_cookies.storage.mongo.MongoStorage<storage-mongo>`
 50 | 
 51 | .. setting:: COOKIES_MONGO_MONGOCLIENT_HOST
 52 | 
 53 | COOKIES_MONGO_MONGOCLIENT_HOST
 54 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 55 | 
 56 | Default: ``localhost``
 57 | 
 58 | Hostname or IP address or Unix domain socket path of a single mongod or mongos
 59 | instance to connect to, or a mongodb URI, or a list of hostnames / mongodb URIs.
 60 | If host is an IPv6 literal it must be enclosed in ‘[‘ and ‘]’ characters
 61 | following the RFC2732 URL syntax (e.g. ‘[::1]’ for localhost). Multihomed and
 62 | round robin DNS addresses are not supported.
 63 | 
 64 | Please refer to mongo_client_.
 65 | 
 66 | .. setting:: COOKIES_MONGO_MONGOCLIENT_PORT
 67 | 
 68 | COOKIES_MONGO_MONGOCLIENT_PORT
 69 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 70 | 
 71 | Default: ``27017``
 72 | 
 73 | Port number on which to connect.
 74 | 
 75 | Please refer to mongo_client_.
 76 | 
 77 | .. setting:: COOKIES_MONGO_MONGOCLIENT_DOCUMENT_CLASS
 78 | 
 79 | COOKIES_MONGO_MONGOCLIENT_DOCUMENT_CLASS
 80 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 81 | 
 82 | Default: ``dict``
 83 | 
 84 | Default class to use for documents returned from queries on this client.
 85 | 
 86 | Please refer to mongo_client_.
 87 | 
 88 | .. setting:: COOKIES_MONGO_MONGOCLIENT_TZ_AWARE
 89 | 
 90 | COOKIES_MONGO_MONGOCLIENT_TZ_AWARE
 91 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 92 | 
 93 | Default: ``False``
 94 | 
 95 | If True, datetime instances returned as values in a document by this MongoClient
 96 | will be timezone aware (otherwise they will be naive).
 97 | 
 98 | Please refer to mongo_client_.
 99 | 
100 | .. setting:: COOKIES_MONGO_MONGOCLIENT_CONNECT
101 | 
102 | COOKIES_MONGO_MONGOCLIENT_CONNECT
103 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
104 | 
105 | Default: ``True``
106 | 
107 | If True (the default), immediately begin connecting to MongoDB in the
108 | background. Otherwise connect on the first operation.
109 | 
110 | Please refer to mongo_client_.
111 | 
112 | .. setting:: COOKIES_MONGO_MONGOCLIENT_KWARGS
113 | 
114 | COOKIES_MONGO_MONGOCLIENT_KWARGS
115 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
116 | 
117 | Please refer to mongo_client_.
118 | 
119 | .. setting:: COOKIES_MONGO_DATABASE
120 | 
121 | COOKIES_MONGO_DATABASE
122 | ~~~~~~~~~~~~~~~~~~~~~~
123 | 
124 | Default: ``cookies``
125 | 
126 | The name of the database - a string. If None (the default) the database named in
127 | the MongoDB connection URI is returned.
128 | 
129 | Please refer to get_database_.
130 | 
131 | .. setting:: COOKIES_MONGO_COLLECTION
132 | 
133 | COOKIES_MONGO_COLLECTION
134 | ~~~~~~~~~~~~~~~~~~~~~~~~
135 | 
136 | Default: ``cookies``
137 | 
138 | The name of the collection - a string.
139 | 
140 | Please refer to get_collection_.
141 | 
142 | 
143 | .. _mongo_client: http://api.mongodb.com/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
144 | .. _get_database: http://api.mongodb.com/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient.get_database
145 | .. _get_collection: http://api.mongodb.com/python/current/api/pymongo/database.html#pymongo.database.Database.get_collection
146 | 
147 | 
148 | .. setting:: COOKIES_REDIS_HOST
149 | 
150 | COOKIES_REDIS_HOST
151 | ~~~~~~~~~~~~~~~~~~
152 | 
153 | Please refer to `redis-py's documentation`_.
154 | 
155 | .. setting:: COOKIES_REDIS_PORT
156 | 
157 | COOKIES_REDIS_PORT
158 | ~~~~~~~~~~~~~~~~~~
159 | 
160 | Please refer to `redis-py's documentation`_.
161 | 
162 | .. setting:: COOKIES_REDIS_DB
163 | 
164 | COOKIES_REDIS_DB
165 | ~~~~~~~~~~~~~~~~
166 | 
167 | Please refer to `redis-py's documentation`_.
168 | 
169 | .. setting:: COOKIES_REDIS_PASSWORD
170 | 
171 | COOKIES_REDIS_PASSWORD
172 | ~~~~~~~~~~~~~~~~~~~~~~
173 | 
174 | Please refer to `redis-py's documentation`_.
175 | 
176 | .. setting:: COOKIES_REDIS_SOCKET_TIMEOUT
177 | 
178 | COOKIES_REDIS_SOCKET_TIMEOUT
179 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
180 | 
181 | Please refer to `redis-py's documentation`_.
182 | 
183 | .. setting:: COOKIES_REDIS_SOCKET_CONNECT_TIMEOUT
184 | 
185 | COOKIES_REDIS_SOCKET_CONNECT_TIMEOUT
186 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
187 | 
188 | Please refer to `redis-py's documentation`_.
189 | 
190 | .. setting:: COOKIES_REDIS_SOCKET_KEEPALIVE
191 | 
192 | COOKIES_REDIS_SOCKET_KEEPALIVE
193 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
194 | 
195 | Please refer to `redis-py's documentation`_.
196 | 
197 | .. setting:: COOKIES_REDIS_SOCKET_KEEPALIVE_OPTIONS
198 | 
199 | COOKIES_REDIS_SOCKET_KEEPALIVE_OPTIONS
200 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
201 | 
202 | Please refer to `redis-py's documentation`_.
203 | 
204 | .. setting:: COOKIES_REDIS_CONNECTION_POOL
205 | 
206 | COOKIES_REDIS_CONNECTION_POOL
207 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
208 | 
209 | Please refer to `redis-py's documentation`_.
210 | 
211 | .. setting:: COOKIES_REDIS_UNIX_SOCKET_PATH
212 | 
213 | COOKIES_REDIS_UNIX_SOCKET_PATH
214 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
215 | 
216 | Please refer to `redis-py's documentation`_.
217 | 
218 | .. setting:: COOKIES_REDIS_ENCODING
219 | 
220 | COOKIES_REDIS_ENCODING
221 | ~~~~~~~~~~~~~~~~~~~~~~
222 | 
223 | Please refer to `redis-py's documentation`_.
224 | 
225 | .. setting:: COOKIES_REDIS_ENCODING_ERRORS
226 | 
227 | COOKIES_REDIS_ENCODING_ERRORS
228 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
229 | 
230 | Please refer to `redis-py's documentation`_.
231 | 
232 | .. setting:: COOKIES_REDIS_CHARSET
233 | 
234 | COOKIES_REDIS_CHARSET
235 | ~~~~~~~~~~~~~~~~~~~~~
236 | 
237 | Please refer to `redis-py's documentation`_.
238 | 
239 | .. setting:: COOKIES_REDIS_ERRORS
240 | 
241 | COOKIES_REDIS_ERRORS
242 | ~~~~~~~~~~~~~~~~~~~~
243 | 
244 | Please refer to `redis-py's documentation`_.
245 | 
246 | .. setting:: COOKIES_REDIS_DECODE_RESPONSES
247 | 
248 | COOKIES_REDIS_DECODE_RESPONSES
249 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
250 | 
251 | Please refer to `redis-py's documentation`_.
252 | 
253 | .. setting:: COOKIES_REDIS_RETRY_ON_TIMEOUT
254 | 
255 | COOKIES_REDIS_RETRY_ON_TIMEOUT
256 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 | 
258 | Please refer to `redis-py's documentation`_.
259 | 
260 | .. setting:: COOKIES_REDIS_SSL
261 | 
262 | COOKIES_REDIS_SSL
263 | ~~~~~~~~~~~~~~~~~
264 | 
265 | Please refer to `redis-py's documentation`_.
266 | 
267 | .. setting:: COOKIES_REDIS_SSL_KEYFILE
268 | 
269 | COOKIES_REDIS_SSL_KEYFILE
270 | ~~~~~~~~~~~~~~~~~~~~~~~~~
271 | 
272 | Please refer to `redis-py's documentation`_.
273 | 
274 | .. setting:: COOKIES_REDIS_SSL_CERTFILE
275 | 
276 | COOKIES_REDIS_SSL_CERTFILE
277 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
278 | 
279 | Please refer to `redis-py's documentation`_.
280 | 
281 | .. setting:: COOKIES_REDIS_SSL_CERT_REQS
282 | 
283 | COOKIES_REDIS_SSL_CERT_REQS
284 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
285 | 
286 | Please refer to `redis-py's documentation`_.
287 | 
288 | .. setting:: COOKIES_REDIS_SSL_CA_CERTS
289 | 
290 | COOKIES_REDIS_SSL_CA_CERTS
291 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
292 | 
293 | Please refer to `redis-py's documentation`_.
294 | 
295 | .. setting:: COOKIES_REDIS_MAX_CONNECTIONS
296 | 
297 | COOKIES_REDIS_MAX_CONNECTIONS
298 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
299 | 
300 | Please refer to `redis-py's documentation`_.
301 | 
302 | .. _redis-py's documentation: https://redis-py.readthedocs.io/en/latest/
303 | 


--------------------------------------------------------------------------------
/docs/topics/storage.rst:
--------------------------------------------------------------------------------
  1 | .. _topics-storage:
  2 | 
  3 | =======
  4 | Storage
  5 | =======
  6 | 
  7 | The class of storage is the one implementing MutableMapping_ interface. There
  8 | are some storage classes provided with this middleware:
  9 | 
 10 | .. _MutableMapping: https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping
 11 | 
 12 | .. _storage-inmemory:
 13 | 
 14 | InMemoryStorage
 15 | ---------------
 16 | 
 17 | .. module:: scrapy_cookies.storage.in_memory
 18 |    :synopsis: In Memory Storage
 19 | 
 20 | .. class:: InMemoryStorage
 21 | 
 22 |    This storage enables keeping cookies inside the memory, to provide ultra fast
 23 |    read and write cookies performance.
 24 | 
 25 | .. _storage-sqlite:
 26 | 
 27 | SQLiteStorage
 28 | -------------
 29 | 
 30 | .. module:: scrapy_cookies.storage.sqlite
 31 |    :synopsis: SQLite Storage
 32 | 
 33 | .. class:: SQLiteStorage
 34 | 
 35 |    This storage enables keeping cookies in SQLite, which supports already by
 36 |    Python.
 37 | 
 38 | The following settings can be used to configure this storage:
 39 | 
 40 | * |COOKIES_SQLITE_DATABASE|_
 41 | 
 42 | .. |COOKIES_SQLITE_DATABASE| replace:: ``COOKIES_SQLITE_DATABASE``
 43 | .. _COOKIES_SQLITE_DATABASE: https://docs.python.org/3/library/sqlite3.html#sqlite3.connect
 44 | 
 45 | .. _storage-mongo:
 46 | 
 47 | MongoStorage
 48 | ------------
 49 | 
 50 | .. module:: scrapy_cookies.storage.mongo
 51 |    :synopsis: Mongo Storage
 52 | 
 53 | .. class:: MongoStorage
 54 | 
 55 |    This storage enables keeping cookies in MongoDB.
 56 | 
 57 | The following settings can be used to configure this storage:
 58 | 
 59 | * :setting:`COOKIES_MONGO_MONGOCLIENT_HOST`
 60 | * :setting:`COOKIES_MONGO_MONGOCLIENT_PORT`
 61 | * :setting:`COOKIES_MONGO_MONGOCLIENT_DOCUMENT_CLASS`
 62 | * :setting:`COOKIES_MONGO_MONGOCLIENT_TZ_AWARE`
 63 | * :setting:`COOKIES_MONGO_MONGOCLIENT_CONNECT`
 64 | * :setting:`COOKIES_MONGO_MONGOCLIENT_KWARGS`
 65 | * :setting:`COOKIES_MONGO_DATABASE`
 66 | * :setting:`COOKIES_MONGO_COLLECTION`
 67 | 
 68 | .. _storage-redis:
 69 | 
 70 | RedisStorage
 71 | ------------
 72 | 
 73 | .. module:: scrapy_cookies.storage.redis
 74 |    :synopsis: Redis Storage
 75 | 
 76 | .. class:: RedisStorage
 77 | 
 78 |    This storage enables keeping cookies in Redis.
 79 | 
 80 | The following settings can be used to configure this storage:
 81 | 
 82 | * :setting:`COOKIES_REDIS_HOST`
 83 | * :setting:`COOKIES_REDIS_PORT`
 84 | * :setting:`COOKIES_REDIS_DB`
 85 | * :setting:`COOKIES_REDIS_PASSWORD`
 86 | * :setting:`COOKIES_REDIS_SOCKET_TIMEOUT`
 87 | * :setting:`COOKIES_REDIS_SOCKET_CONNECT_TIMEOUT`
 88 | * :setting:`COOKIES_REDIS_SOCKET_KEEPALIVE`
 89 | * :setting:`COOKIES_REDIS_SOCKET_KEEPALIVE_OPTIONS`
 90 | * :setting:`COOKIES_REDIS_CONNECTION_POOL`
 91 | * :setting:`COOKIES_REDIS_UNIX_SOCKET_PATH`
 92 | * :setting:`COOKIES_REDIS_ENCODING`
 93 | * :setting:`COOKIES_REDIS_ENCODING_ERRORS`
 94 | * :setting:`COOKIES_REDIS_CHARSET`
 95 | * :setting:`COOKIES_REDIS_ERRORS`
 96 | * :setting:`COOKIES_REDIS_DECODE_RESPONSES`
 97 | * :setting:`COOKIES_REDIS_RETRY_ON_TIMEOUT`
 98 | * :setting:`COOKIES_REDIS_SSL`
 99 | * :setting:`COOKIES_REDIS_SSL_KEYFILE`
100 | * :setting:`COOKIES_REDIS_SSL_CERTFILE`
101 | * :setting:`COOKIES_REDIS_SSL_CERT_REQS`
102 | * :setting:`COOKIES_REDIS_SSL_CA_CERTS`
103 | * :setting:`COOKIES_REDIS_MAX_CONNECTIONS`
104 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts =
3 |     --cov=scrapy_cookies
4 |     --cov-report=html
5 |     --cov-report=term
6 |     --docker-compose=tests/test_storages/docker-compose.yml
7 |     --docker-compose-remove-volumes
8 | testpaths = tests
9 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 |   "extends": [
3 |     "config:base"
4 |   ]
5 | }
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | hiredis
2 | pymongo
3 | redis
4 | scrapy
5 | six
6 | ujson
7 | 


--------------------------------------------------------------------------------
/scrapy_cookies/VERSION:
--------------------------------------------------------------------------------
1 | 0.3
2 | 


--------------------------------------------------------------------------------
/scrapy_cookies/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Scrapy-Cookies - A middleware of cookies persistence for Scrapy
 3 | """
 4 | 
 5 | __all__ = ["__version__", "version_info"]
 6 | 
 7 | # Scrapy version
 8 | import pkgutil
 9 | 
10 | __version__ = pkgutil.get_data(__package__, "VERSION").decode("ascii").strip()
11 | version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split("."))
12 | del pkgutil
13 | 


--------------------------------------------------------------------------------
/scrapy_cookies/downloadermiddlewares/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapedia/scrapy-cookies/342eaada3b84db4971be09862c34db9f207c0fb7/scrapy_cookies/downloadermiddlewares/__init__.py


--------------------------------------------------------------------------------
/scrapy_cookies/downloadermiddlewares/cookies.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from http.cookiejar import Cookie
  3 | from typing import Dict, List
  4 | 
  5 | from scrapy.crawler import Crawler
  6 | from scrapy.exceptions import NotConfigured
  7 | from scrapy.http import Request, Response
  8 | from scrapy.http.cookies import CookieJar
  9 | from scrapy.settings import SETTINGS_PRIORITIES, Settings
 10 | from scrapy.signals import spider_closed, spider_opened
 11 | from scrapy.spiders import Spider
 12 | from scrapy.utils.misc import load_object
 13 | try:
 14 |     from scrapy.utils.python import to_native_str
 15 | except ImportError:
 16 |     # to_native_str is deprecated since version 2.8
 17 |     # https://docs.scrapy.org/en/2.8/news.html#deprecation-removals
 18 |     from scrapy.utils.python import to_unicode as to_native_str
 19 | 
 20 | from scrapy_cookies.settings import default_settings, unfreeze_settings
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | def format_cookie(cookie: Dict) -> str:
 26 |     # build cookie string
 27 |     cookie_str: str = "{}={}".format(cookie["name"], cookie["value"])
 28 | 
 29 |     if cookie.get("path", None):
 30 |         cookie_str += "; Path={}".format(cookie["path"])
 31 |     if cookie.get("domain", None):
 32 |         cookie_str += "; Domain={}".format(cookie["domain"])
 33 | 
 34 |     return cookie_str
 35 | 
 36 | 
 37 | def get_request_cookies(jar: CookieJar, request: Request) -> List[Cookie]:
 38 |     if isinstance(request.cookies, dict):
 39 |         cookie_list: List[Dict] = [
 40 |             {"name": k, "value": v} for k, v in request.cookies.items()
 41 |         ]
 42 |     else:
 43 |         cookie_list: List[Dict] = request.cookies
 44 | 
 45 |     cookies: List[str] = [format_cookie(x) for x in cookie_list]
 46 |     headers: Dict[str, List[str]] = {"Set-Cookie": cookies}
 47 |     response: Response = Response(request.url, headers=headers)
 48 | 
 49 |     return jar.make_cookies(response, request)
 50 | 
 51 | 
 52 | class CookiesMiddleware:
 53 |     """This middleware enables working with sites that need cookies"""
 54 | 
 55 |     def __init__(self, settings: Settings):
 56 |         self.settings: Settings = settings
 57 |         self.jars = load_object(settings["COOKIES_STORAGE"]).from_middleware(self)
 58 |         self.debug: bool = settings["COOKIES_DEBUG"]
 59 | 
 60 |     @classmethod
 61 |     def from_crawler(cls, crawler: Crawler):
 62 |         with unfreeze_settings(crawler.settings) as settings:
 63 |             settings.setmodule(
 64 |                 module=default_settings, priority=SETTINGS_PRIORITIES["default"]
 65 |             )
 66 |         if not crawler.settings.getbool("COOKIES_ENABLED"):
 67 |             raise NotConfigured
 68 |         obj = cls(crawler.settings)
 69 |         crawler.signals.connect(obj.spider_opened, signal=spider_opened)
 70 |         crawler.signals.connect(obj.spider_closed, signal=spider_closed)
 71 |         return obj
 72 | 
 73 |     def spider_opened(self, spider: Spider):
 74 |         logger.info(
 75 |             "%s is used as the cookies storage.", self.settings["COOKIES_STORAGE"]
 76 |         )
 77 |         self.jars.open_spider(spider)
 78 | 
 79 |     def spider_closed(self, spider: Spider):
 80 |         self.jars.close_spider(spider)
 81 | 
 82 |     def process_request(self, request: Request, spider: Spider) -> None:
 83 |         if request.meta.get("dont_merge_cookies", False):
 84 |             return
 85 | 
 86 |         cookiejar_key = request.meta.get("cookiejar")
 87 |         jar: CookieJar = self.jars[cookiejar_key]
 88 |         cookies: List[Cookie] = get_request_cookies(jar, request)
 89 |         for cookie in cookies:
 90 |             jar.set_cookie_if_ok(cookie, request)
 91 |         self.jars[cookiejar_key] = jar
 92 | 
 93 |         # set Cookie header
 94 |         request.headers.pop("Cookie", None)
 95 |         jar.add_cookie_header(request)
 96 |         self._debug_cookie(request, spider)
 97 | 
 98 |     def process_response(
 99 |         self, request: Request, response: Response, spider: Spider
100 |     ) -> Response:
101 |         if request.meta.get("dont_merge_cookies", False):
102 |             return response
103 | 
104 |         # extract cookies from Set-Cookie and drop invalid/expired cookies
105 |         cookiejar_key = request.meta.get("cookiejar")
106 |         jar: CookieJar = self.jars[cookiejar_key]
107 |         jar.extract_cookies(response, request)
108 |         self.jars[cookiejar_key] = jar
109 |         self._debug_set_cookie(response, spider)
110 | 
111 |         return response
112 | 
113 |     def _debug_cookie(self, request: Request, spider: Spider):
114 |         if self.debug:
115 |             cl = [
116 |                 to_native_str(c, errors="replace")
117 |                 for c in request.headers.getlist("Cookie")
118 |             ]
119 |             if cl:
120 |                 cookies: str = "\n".join("Cookie: {}\n".format(c) for c in cl)
121 |                 msg: str = "Sending cookies to: {}\n{}".format(request, cookies)
122 |                 logger.debug(msg, extra={"spider": spider})
123 | 
124 |     def _debug_set_cookie(self, response: Response, spider: Spider):
125 |         if self.debug:
126 |             cl = [
127 |                 to_native_str(c, errors="replace")
128 |                 for c in response.headers.getlist("Set-Cookie")
129 |             ]
130 |             if cl:
131 |                 cookies: str = "\n".join("Set-Cookie: {}\n".format(c) for c in cl)
132 |                 msg: str = "Received cookies from: {}\n{}".format(response, cookies)
133 |                 logger.debug(msg, extra={"spider": spider})
134 | 


--------------------------------------------------------------------------------
/scrapy_cookies/settings/__init__.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | 
 3 | 
 4 | @contextmanager
 5 | def unfreeze_settings(settings):
 6 |     original_status = settings.frozen
 7 |     settings.frozen = False
 8 |     try:
 9 |         yield settings
10 |     finally:
11 |         settings.frozen = original_status
12 | 


--------------------------------------------------------------------------------
/scrapy_cookies/settings/default_settings.py:
--------------------------------------------------------------------------------
 1 | COOKIES_ENABLED = True
 2 | COOKIES_DEBUG = False
 3 | 
 4 | COOKIES_PERSISTENCE = False
 5 | COOKIES_PERSISTENCE_DIR = "cookies"
 6 | 
 7 | # ------------------------------------------------------------------------------
 8 | # IN MEMORY STORAGE
 9 | # ------------------------------------------------------------------------------
10 | 
11 | COOKIES_STORAGE = "scrapy_cookies.storage.in_memory.InMemoryStorage"
12 | 
13 | # ------------------------------------------------------------------------------
14 | # SQLITE STORAGE
15 | # ------------------------------------------------------------------------------
16 | 
17 | # COOKIES_STORAGE = 'scrapy_cookies.storage.sqlite.SQLiteStorage'
18 | COOKIES_SQLITE_DATABASE = ":memory:"
19 | 
20 | # ------------------------------------------------------------------------------
21 | # MONGODB
22 | # ------------------------------------------------------------------------------
23 | 
24 | # http://api.mongodb.com/python/current/api/pymongo/mongo_client.html#pymongo.mongo_client.MongoClient
25 | 
26 | # COOKIES_STORAGE = 'scrapy_cookies.storage.mongo.MongoStorage'
27 | COOKIES_MONGO_MONGOCLIENT_HOST = "localhost"
28 | COOKIES_MONGO_MONGOCLIENT_PORT = 27017
29 | COOKIES_MONGO_MONGOCLIENT_DOCUMENT_CLASS = dict
30 | COOKIES_MONGO_MONGOCLIENT_TZ_AWARE = False
31 | COOKIES_MONGO_MONGOCLIENT_CONNECT = True
32 | 
33 | COOKIES_MONGO_MONGOCLIENT_KWARGS = {
34 |     # 'username': 'username',
35 |     # 'password': 'password',
36 |     # 'authSource': 'admin',
37 |     # 'authMechanism': 'SCRAM-SHA-1',
38 | }
39 | 
40 | COOKIES_MONGO_DATABASE = "cookies"
41 | # or
42 | # COOKIES_MONGO_DATABASE = {
43 | #     'name': 'cookies',
44 | #     'codec_options': None,
45 | #     'read_preference': None,
46 | #     'write_concern': None,
47 | #     'read_concern': None
48 | # }
49 | 
50 | COOKIES_MONGO_COLLECTION = "cookies"
51 | # or
52 | # COOKIES_MONGO_COLLECTION = {
53 | #     'name': 'cookies',
54 | #     'codec_options': None,
55 | #     'read_preference': None,
56 | #     'write_concern': None,
57 | #     'read_concern': None
58 | # }
59 | 
60 | # ------------------------------------------------------------------------------
61 | # REDIS STORAGE
62 | # ------------------------------------------------------------------------------
63 | 
64 | # COOKIES_STORAGE = 'scrapy_cookies.storage.redis.RedisStorage'
65 | COOKIES_REDIS_HOST = "localhost"
66 | COOKIES_REDIS_PORT = 6379
67 | COOKIES_REDIS_DB = 0
68 | COOKIES_REDIS_PASSWORD = None
69 | COOKIES_REDIS_SOCKET_TIMEOUT = None
70 | COOKIES_REDIS_SOCKET_CONNECT_TIMEOUT = None
71 | COOKIES_REDIS_SOCKET_KEEPALIVE = None
72 | COOKIES_REDIS_SOCKET_KEEPALIVE_OPTIONS = None
73 | COOKIES_REDIS_CONNECTION_POOL = None
74 | COOKIES_REDIS_UNIX_SOCKET_PATH = None
75 | COOKIES_REDIS_ENCODING = "utf-8"
76 | COOKIES_REDIS_ENCODING_ERRORS = "strict"
77 | COOKIES_REDIS_CHARSET = None
78 | COOKIES_REDIS_ERRORS = None
79 | COOKIES_REDIS_DECODE_RESPONSES = False
80 | COOKIES_REDIS_RETRY_ON_TIMEOUT = False
81 | COOKIES_REDIS_SSL = False
82 | COOKIES_REDIS_SSL_KEYFILE = None
83 | COOKIES_REDIS_SSL_CERTFILE = None
84 | COOKIES_REDIS_SSL_CERT_REQS = None
85 | COOKIES_REDIS_SSL_CA_CERTS = None
86 | COOKIES_REDIS_MAX_CONNECTIONS = None
87 | 


--------------------------------------------------------------------------------
/scrapy_cookies/signals.py:
--------------------------------------------------------------------------------
1 | """
2 | Scrapy-Cookies signals
3 | 
4 | These signals are documented in docs/topics/signals.rst. Please don't add new
5 | signals here without documenting them there.
6 | """
7 | 
8 | cookies_invalidated = object()
9 | 


--------------------------------------------------------------------------------
/scrapy_cookies/storage/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import MutableMapping
 2 | 
 3 | from scrapy.settings import Settings
 4 | from scrapy.spiders import Spider
 5 | 
 6 | from scrapy_cookies.downloadermiddlewares.cookies import CookiesMiddleware
 7 | 
 8 | 
 9 | class BaseStorage(MutableMapping):
10 |     name = None
11 | 
12 |     def __init__(self, settings: Settings):
13 |         self.settings: Settings = settings
14 | 
15 |     @classmethod
16 |     def from_middleware(cls, middleware: CookiesMiddleware):
17 |         obj = cls(middleware.settings)
18 |         return obj
19 | 
20 |     def open_spider(self, spider: Spider):
21 |         pass
22 | 
23 |     def close_spider(self, spider: Spider):
24 |         pass
25 | 
26 |     def __delitem__(self, v):
27 |         pass
28 | 
29 |     def __getitem__(self, k):
30 |         pass
31 | 
32 |     def __iter__(self):
33 |         pass
34 | 
35 |     def __len__(self):
36 |         pass
37 | 
38 |     def __setitem__(self, k, v):
39 |         pass
40 | 


--------------------------------------------------------------------------------
/scrapy_cookies/storage/in_memory.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import logging
 3 | import os
 4 | import pickle
 5 | from collections import UserDict
 6 | from typing import Dict
 7 | 
 8 | from scrapy.http.cookies import CookieJar
 9 | from scrapy.settings import Settings
10 | from scrapy.spiders import Spider
11 | from scrapy.utils.project import data_path
12 | 
13 | from scrapy_cookies.storage import BaseStorage
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class InMemoryStorage(UserDict, BaseStorage):
19 |     def __init__(self, settings: Settings):
20 |         super(InMemoryStorage, self).__init__()
21 |         self.settings: Settings = settings
22 |         self.cookies_dir: str = data_path(settings["COOKIES_PERSISTENCE_DIR"])
23 | 
24 |     def open_spider(self, spider: Spider):
25 |         logger.info("COOKIES_PERSISTENCE is %s.", self.settings["COOKIES_PERSISTENCE"])
26 |         if not self.settings["COOKIES_PERSISTENCE"]:
27 |             return
28 |         if not os.path.exists(self.cookies_dir):
29 |             logger.info("Cookies dir does not exist.")
30 |             return
31 |         with io.open(self.cookies_dir, "br") as f:
32 |             self.data: Dict = pickle.load(f)
33 |             logger.info("The number of restored cookies is %d.", len(self.data))
34 | 
35 |     def close_spider(self, spider: Spider):
36 |         if self.settings["COOKIES_PERSISTENCE"]:
37 |             with io.open(self.cookies_dir, "bw") as f:
38 |                 pickle.dump(self.data, f)
39 |                 logger.info("The number of saved cookies is %d.", len(self.data))
40 | 
41 |     def __missing__(self, key) -> CookieJar:
42 |         self.data.update({key: CookieJar()})
43 |         return self.data[key]
44 | 


--------------------------------------------------------------------------------
/scrapy_cookies/storage/mongo.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import pickle
  3 | import re
  4 | from http.cookiejar import Cookie
  5 | from itertools import starmap
  6 | from typing import Dict
  7 | 
  8 | import pymongo
  9 | from pymongo import MongoClient
 10 | from pymongo.collection import Collection
 11 | from pymongo.database import Database
 12 | from scrapy.http.cookies import CookieJar
 13 | from scrapy.settings import Settings
 14 | from scrapy.spiders import Spider
 15 | 
 16 | from scrapy_cookies.storage import BaseStorage
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | pattern = re.compile("^COOKIES_MONGO_MONGOCLIENT_(?P<kwargs>(?!KWARGS).*)$")
 20 | 
 21 | 
 22 | def get_arguments(var):
 23 |     return {str: {"name": var}, dict: var}[type(var)]
 24 | 
 25 | 
 26 | def write_cookiejar(cookiejar: CookieJar):
 27 |     return pickle.dumps(cookiejar)
 28 | 
 29 | 
 30 | def read_cookiejar(document):
 31 |     try:
 32 |         return pickle.loads(document["cookiejar"])
 33 |     except TypeError:
 34 |         return None
 35 | 
 36 | 
 37 | def convert_cookiejar(cookiejar):
 38 |     def _convert_cookies(x):
 39 |         if isinstance(x, (str, int, bool)):
 40 |             return x
 41 |         elif isinstance(x, Cookie):
 42 |             return dict(
 43 |                 map(
 44 |                     lambda attr: (attr, getattr(x, attr)),
 45 |                     (
 46 |                         "version",
 47 |                         "name",
 48 |                         "value",
 49 |                         "port",
 50 |                         "port_specified",
 51 |                         "domain",
 52 |                         "domain_specified",
 53 |                         "domain_initial_dot",
 54 |                         "path",
 55 |                         "path_specified",
 56 |                         "secure",
 57 |                         "expires",
 58 |                         "discard",
 59 |                         "comment",
 60 |                         "comment_url",
 61 |                     ),
 62 |                 )
 63 |             )
 64 | 
 65 |         elif isinstance(x, dict):
 66 |             return dict(
 67 |                 starmap(
 68 |                     lambda k, v: (_convert_cookies(k), _convert_cookies(v)), x.items()
 69 |                 )
 70 |             )
 71 | 
 72 |     return _convert_cookies(cookiejar._cookies)
 73 | 
 74 | 
 75 | class MongoStorage(BaseStorage):
 76 |     def __init__(self, settings: Settings):
 77 |         super(MongoStorage, self).__init__(settings)
 78 |         self.mongo_settings: Dict[str, str] = dict(
 79 |             starmap(
 80 |                 lambda k, v: (pattern.sub(lambda x: x.group(1).lower(), k), v),
 81 |                 filter(
 82 |                     lambda pair: pattern.match(pair[0]), settings.copy_to_dict().items()
 83 |                 ),
 84 |             )
 85 |         )
 86 |         self.mongo_settings.update(self.settings["COOKIES_MONGO_MONGOCLIENT_KWARGS"])
 87 |         self.client: MongoClient = None
 88 |         self.db: Database = None
 89 |         self.coll: Collection = None
 90 | 
 91 |     @classmethod
 92 |     def from_middleware(cls, middleware):
 93 |         obj = cls(middleware.settings)
 94 |         return obj
 95 | 
 96 |     def open_spider(self, spider: Spider):
 97 |         self.client: MongoClient = MongoClient(**self.mongo_settings)
 98 | 
 99 |         self.db: Database = self.client.get_database(
100 |             **get_arguments(self.settings["COOKIES_MONGO_DATABASE"])
101 |         )
102 |         self.coll: Collection = self.db.get_collection(
103 |             **get_arguments(self.settings["COOKIES_MONGO_COLLECTION"])
104 |         )
105 |         self.coll.create_index([("key", pymongo.ASCENDING)], unique=True)
106 | 
107 |     def close_spider(self, spider: Spider):
108 |         self.client.close()
109 | 
110 |     def __missing__(self, k) -> CookieJar:
111 |         cookiejar: CookieJar = CookieJar()
112 |         self[k] = cookiejar
113 |         return cookiejar
114 | 
115 |     def __delitem__(self, v):
116 |         # TODO: finish this method
117 |         self.coll.delete_one({})
118 | 
119 |     def __getitem__(self, k) -> CookieJar:
120 |         v: CookieJar = read_cookiejar(self.coll.find_one({"key": k}))
121 |         if isinstance(v, CookieJar):
122 |             return v
123 |         if hasattr(self.__class__, "__missing__"):
124 |             return self.__class__.__missing__(self, k)
125 |         raise KeyError(k)
126 | 
127 |     def __iter__(self):
128 |         return iter(self.coll.find())
129 | 
130 |     def __len__(self) -> int:
131 |         return self.coll.count_documents({})
132 | 
133 |     def __setitem__(self, k, v):
134 |         self.coll.update_one(
135 |             {"key": k},
136 |             {
137 |                 "$set": {
138 |                     "key": k,
139 |                     "cookiejar": write_cookiejar(v),
140 |                     "cookies": convert_cookiejar(v),
141 |                 }
142 |             },
143 |             upsert=True,
144 |         )
145 | 


--------------------------------------------------------------------------------
/scrapy_cookies/storage/redis_.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import pickle
 3 | import re
 4 | from itertools import starmap
 5 | from typing import Dict
 6 | 
 7 | import ujson
 8 | from redis.client import Redis
 9 | from scrapy.http.cookies import CookieJar
10 | from scrapy.settings import Settings
11 | from scrapy.spiders import Spider
12 | 
13 | from scrapy_cookies.storage import BaseStorage
14 | 
15 | logger = logging.getLogger(__name__)
16 | pattern = re.compile("^COOKIES_REDIS_(?P<kwargs>(?!KWARGS).*)$")
17 | 
18 | 
19 | def get_arguments(var):
20 |     return {str: {"name": var}, dict: var}[type(var)]
21 | 
22 | 
23 | def write_cookiejar(cookiejar):
24 |     return {
25 |         "cookiejar": pickle.dumps(cookiejar),
26 |         "cookies": ujson.dumps(cookiejar._cookies),
27 |     }
28 | 
29 | 
30 | def read_cookiejar(document):
31 |     try:
32 |         return pickle.loads(document["cookiejar"])
33 |     except (TypeError, KeyError):
34 |         return None
35 | 
36 | 
37 | class RedisStorage(BaseStorage):
38 |     def __init__(self, settings: Settings):
39 |         super(RedisStorage, self).__init__(settings)
40 |         self.redis_settings: Dict[str, str] = dict(
41 |             starmap(
42 |                 lambda k, v: (pattern.sub(lambda x: x.group(1).lower(), k), v),
43 |                 filter(
44 |                     lambda pair: pattern.match(pair[0]), settings.copy_to_dict().items()
45 |                 ),
46 |             )
47 |         )
48 |         self.r: Redis = None
49 | 
50 |     @classmethod
51 |     def from_middleware(cls, middleware):
52 |         obj = cls(middleware.settings)
53 |         return obj
54 | 
55 |     def open_spider(self, spider: Spider):
56 |         self.r: Redis = Redis(**self.redis_settings)
57 | 
58 |     def close_spider(self, spider: Spider):
59 |         pass
60 | 
61 |     def __missing__(self, k) -> CookieJar:
62 |         cookiejar: CookieJar = CookieJar()
63 |         self[k] = cookiejar
64 |         return cookiejar
65 | 
66 |     def __delitem__(self, v):
67 |         self.r.delete(v)
68 | 
69 |     def __getitem__(self, k) -> CookieJar:
70 |         v: CookieJar = read_cookiejar(self.r.hgetall(k))
71 |         if isinstance(v, CookieJar):
72 |             return v
73 |         if hasattr(self.__class__, "__missing__"):
74 |             return self.__class__.__missing__(self, k)
75 |         raise KeyError(k)
76 | 
77 |     def __iter__(self):
78 |         return self.r.scan_iter()
79 | 
80 |     def __len__(self) -> int:
81 |         return self.r.dbsize()
82 | 
83 |     def __setitem__(self, k, v: CookieJar):
84 |         self.r.hmset(name=k, mapping=write_cookiejar(v))
85 | 


--------------------------------------------------------------------------------
/scrapy_cookies/storage/sqlite.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import logging
  3 | import os
  4 | import pickle
  5 | import sqlite3
  6 | from sqlite3 import Connection, Cursor, Row
  7 | 
  8 | from scrapy.http.cookies import CookieJar
  9 | from scrapy.settings import Settings
 10 | from scrapy.spiders import Spider
 11 | from scrapy.utils.project import data_path
 12 | 
 13 | from scrapy_cookies.storage import BaseStorage
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | 
 18 | def adapt_cookiejar(cookiejar: CookieJar) -> bytes:
 19 |     return pickle.dumps(cookiejar)
 20 | 
 21 | 
 22 | def convert_cookiejar_and_its_key(cookiejar_or_its_key: bytes):
 23 |     return pickle.loads(cookiejar_or_its_key)
 24 | 
 25 | 
 26 | sqlite3.register_adapter(CookieJar, adapt_cookiejar)
 27 | sqlite3.register_converter("cookiejar", convert_cookiejar_and_its_key)
 28 | sqlite3.register_converter("cookiejar_key", convert_cookiejar_and_its_key)
 29 | 
 30 | 
 31 | class SQLiteStorage(BaseStorage):
 32 |     def __init__(self, settings: Settings):
 33 |         super(SQLiteStorage, self).__init__(settings)
 34 |         self.cookies_dir: str = data_path(settings["COOKIES_PERSISTENCE_DIR"])
 35 |         self.database: str = settings["COOKIES_SQLITE_DATABASE"]
 36 |         self.conn: Connection = None
 37 |         self.cur: Cursor = None
 38 | 
 39 |     def open_spider(self, spider: Spider):
 40 |         self.conn: Connection = sqlite3.connect(
 41 |             self.database, detect_types=sqlite3.PARSE_COLNAMES, isolation_level=None
 42 |         )
 43 |         self.conn.row_factory = sqlite3.Row
 44 |         self.cur: Cursor = self.conn.cursor()
 45 |         if self.database == ":memory:":
 46 |             if self.settings["COOKIES_PERSISTENCE"] and os.path.isfile(
 47 |                 self.cookies_dir
 48 |             ):
 49 |                 with io.open(self.cookies_dir, "r") as f:
 50 |                     self.cur.executescript(f.read())
 51 |                 return
 52 |         self.cur.execute(
 53 |             "CREATE TABLE IF NOT EXISTS cookies ("
 54 |             "cookiejar_key BLOB PRIMARY KEY UNIQUE, cookiejar BLOB, str TEXT"
 55 |             ")"
 56 |         )
 57 | 
 58 |     def close_spider(self, spider: Spider):
 59 |         if self.database == ":memory:" and self.settings["COOKIES_PERSISTENCE"]:
 60 |             with open(self.cookies_dir, "w") as f:
 61 |                 for line in self.conn.iterdump():
 62 |                     f.write("%s\n" % line)
 63 |         self.conn.close()
 64 | 
 65 |     def __delitem__(self, v):
 66 |         self.cur.execute("DELETE FROM cookies WHERE cookiejar_key=?", pickle.dumps(v))
 67 | 
 68 |     def __getitem__(self, k) -> CookieJar:
 69 |         result: Row = self.cur.execute(
 70 |             'SELECT cookiejar as "cookiejar [CookieJar]" '
 71 |             "FROM cookies "
 72 |             "WHERE cookiejar_key=?",
 73 |             (pickle.dumps(k),),
 74 |         ).fetchone()
 75 |         if result:
 76 |             return result["cookiejar"]
 77 |         if hasattr(self.__class__, "__missing__"):
 78 |             return self.__class__.__missing__(self, k)
 79 |         raise KeyError(k)
 80 | 
 81 |     def __iter__(self):
 82 |         return iter(
 83 |             self.cur.execute(
 84 |                 'SELECT cookiejar_key as "cookiejar_key [CookieJar_key]", cookiejar as "cookiejar [CookieJar]" '
 85 |                 "FROM cookies"
 86 |             ).fetchall()
 87 |         )
 88 | 
 89 |     def __len__(self) -> int:
 90 |         return self.cur.execute("SELECT COUNT(*) FROM cookies").fetchone()[0]
 91 | 
 92 |     def __setitem__(self, k, v: CookieJar) -> None:
 93 |         self.cur.execute(
 94 |             "INSERT OR REPLACE INTO cookies (cookiejar_key, cookiejar, str) VALUES (?, ?, ?)",
 95 |             (pickle.dumps(k), v, str(k)),
 96 |         )
 97 | 
 98 |     def __missing__(self, k) -> CookieJar:
 99 |         v: CookieJar = CookieJar()
100 |         self.__setitem__(k, v)
101 |         return v
102 | 
103 |     def __contains__(self, k) -> bool:
104 |         self.cur.execute(
105 |             'SELECT cookiejar as "cookiejar [CookieJar]" '
106 |             "FROM cookies "
107 |             "WHERE cookiejar_key=?",
108 |             (pickle.dumps(k),),
109 |         )
110 |         return bool(self.cur.fetchone())
111 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_rpm]
2 | doc_files = docs AUTHORS INSTALL LICENSE README.rst
3 | 
4 | [bdist_wheel]
5 | universal=1
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from os.path import dirname, join
 2 | 
 3 | from setuptools import find_packages, setup
 4 | 
 5 | with open(join(dirname(__file__), "scrapy_cookies/VERSION"), "rb") as f:
 6 |     version = f.read().decode("ascii").strip()
 7 | 
 8 | 
 9 | extras_require = {}
10 | 
11 | setup(
12 |     name="Scrapy-Cookies",
13 |     version=version,
14 |     url="https://github.com/grammy-jiang/scrapy-cookies",
15 |     description="A middleware of cookies persistence for Scrapy",
16 |     long_description=open("README.rst").read(),
17 |     author="Scrapedia",
18 |     author_email="Scrapedia@outlook.com",
19 |     maintainer="Scrapedia",
20 |     maintainer_email="Scrapedia@outlook.com",
21 |     license="BSD",
22 |     packages=find_packages(exclude=("tests", "tests.*")),
23 |     include_package_data=True,
24 |     zip_safe=False,
25 |     classifiers=[
26 |         "Framework :: Scrapy",
27 |         "Development Status :: 2 - Pre-Alpha",
28 |         "Environment :: Plugins",
29 |         "Intended Audience :: Developers",
30 |         "License :: OSI Approved :: BSD License",
31 |         "Operating System :: OS Independent",
32 |         "Programming Language :: Python",
33 |         "Programming Language :: Python :: 2",
34 |         "Programming Language :: Python :: 2.7",
35 |         "Programming Language :: Python :: 3",
36 |         "Programming Language :: Python :: 3.4",
37 |         "Programming Language :: Python :: 3.5",
38 |         "Programming Language :: Python :: 3.6",
39 |         "Programming Language :: Python :: Implementation :: CPython",
40 |         "Programming Language :: Python :: Implementation :: PyPy",
41 |         "Topic :: Internet :: WWW/HTTP",
42 |         "Topic :: Software Development :: Libraries :: Application Frameworks",
43 |         "Topic :: Software Development :: Libraries :: Python Modules",
44 |     ],
45 |     python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*",
46 |     install_requires=["hiredis", "pymongo", "redis", "scrapy", "ujson"],
47 |     extras_require=extras_require,
48 | )
49 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapedia/scrapy-cookies/342eaada3b84db4971be09862c34db9f207c0fb7/tests/__init__.py


--------------------------------------------------------------------------------
/tests/requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-benchmark
3 | pytest-cov
4 | pytest-docker-compose
5 | pytest-sugar
6 | pytest-twisted
7 | pytest-xdist
8 | testfixtures
9 | 


--------------------------------------------------------------------------------
/tests/test_downloadermiddleware_cookies.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from unittest import TestCase
  4 | 
  5 | from scrapy.crawler import Crawler
  6 | from scrapy.exceptions import NotConfigured
  7 | from scrapy.http import Request, Response
  8 | from scrapy.settings import Settings
  9 | from scrapy.spiders import Spider
 10 | from scrapy.utils.test import get_crawler
 11 | from testfixtures import LogCapture
 12 | 
 13 | from scrapy_cookies.downloadermiddlewares.cookies import CookiesMiddleware
 14 | from scrapy_cookies.settings import default_settings
 15 | 
 16 | 
 17 | class CookiesMiddlewareTest(TestCase):
 18 |     def assertCookieValEqual(self, first, second, msg=None):
 19 |         cookievaleq = lambda cv: re.split(";\s*", cv.decode("latin1"))
 20 |         return self.assertEqual(
 21 |             sorted(cookievaleq(first)), sorted(cookievaleq(second)), msg
 22 |         )
 23 | 
 24 |     def setUp(self):
 25 |         self.spider = Spider("foo")
 26 |         settings = Settings()
 27 |         settings.setmodule(default_settings)
 28 |         self.crawler = Crawler(Spider, settings)
 29 |         self.mw = CookiesMiddleware.from_crawler(self.crawler)
 30 |         self.mw.spider_opened(self.spider)
 31 | 
 32 |     def tearDown(self):
 33 |         self.mw.spider_closed(self.spider)
 34 |         del self.mw
 35 | 
 36 |     def test_basic(self):
 37 |         req = Request("http://scrapytest.org/")
 38 |         assert self.mw.process_request(req, self.spider) is None
 39 |         assert "Cookie" not in req.headers
 40 | 
 41 |         headers = {"Set-Cookie": "C1=value1; path=/"}
 42 |         res = Response("http://scrapytest.org/", headers=headers)
 43 |         assert self.mw.process_response(req, res, self.spider) is res
 44 | 
 45 |         req2 = Request("http://scrapytest.org/sub1/")
 46 |         assert self.mw.process_request(req2, self.spider) is None
 47 |         self.assertEqual(req2.headers.get("Cookie"), b"C1=value1")
 48 | 
 49 |     def test_setting_false_cookies_enabled(self):
 50 |         self.assertRaises(
 51 |             NotConfigured,
 52 |             CookiesMiddleware.from_crawler,
 53 |             get_crawler(settings_dict={"COOKIES_ENABLED": False}),
 54 |         )
 55 | 
 56 |     def test_setting_default_cookies_enabled(self):
 57 |         self.assertIsInstance(
 58 |             CookiesMiddleware.from_crawler(get_crawler()), CookiesMiddleware
 59 |         )
 60 | 
 61 |     def test_setting_true_cookies_enabled(self):
 62 |         self.assertIsInstance(
 63 |             CookiesMiddleware.from_crawler(
 64 |                 get_crawler(settings_dict={"COOKIES_ENABLED": True})
 65 |             ),
 66 |             CookiesMiddleware,
 67 |         )
 68 | 
 69 |     def test_setting_enabled_cookies_debug(self):
 70 |         crawler = get_crawler(settings_dict={"COOKIES_DEBUG": True})
 71 |         mw = CookiesMiddleware.from_crawler(crawler)
 72 |         mw.spider_opened(self.spider)
 73 |         with LogCapture(
 74 |             "scrapy_cookies.downloadermiddlewares.cookies",
 75 |             propagate=False,
 76 |             level=logging.DEBUG,
 77 |         ) as l:
 78 |             req = Request("http://scrapytest.org/")
 79 |             res = Response(
 80 |                 "http://scrapytest.org/", headers={"Set-Cookie": "C1=value1; path=/"}
 81 |             )
 82 |             mw.process_response(req, res, crawler.spider)
 83 |             req2 = Request("http://scrapytest.org/sub1/")
 84 |             mw.process_request(req2, crawler.spider)
 85 | 
 86 |             l.check(
 87 |                 (
 88 |                     "scrapy_cookies.downloadermiddlewares.cookies",
 89 |                     "DEBUG",
 90 |                     "Received cookies from: <200 http://scrapytest.org/>\n"
 91 |                     "Set-Cookie: C1=value1; path=/\n",
 92 |                 ),
 93 |                 (
 94 |                     "scrapy_cookies.downloadermiddlewares.cookies",
 95 |                     "DEBUG",
 96 |                     "Sending cookies to: <GET http://scrapytest.org/sub1/>\n"
 97 |                     "Cookie: C1=value1\n",
 98 |                 ),
 99 |             )
100 | 
101 |     def test_setting_disabled_cookies_debug(self):
102 |         crawler = get_crawler(settings_dict={"COOKIES_DEBUG": False})
103 |         mw = CookiesMiddleware.from_crawler(crawler)
104 |         mw.spider_opened(self.spider)
105 |         with LogCapture(
106 |             "scrapy_cookies.downloadermiddlewares.cookies",
107 |             propagate=False,
108 |             level=logging.DEBUG,
109 |         ) as l:
110 |             req = Request("http://scrapytest.org/")
111 |             res = Response(
112 |                 "http://scrapytest.org/", headers={"Set-Cookie": "C1=value1; path=/"}
113 |             )
114 |             mw.process_response(req, res, crawler.spider)
115 |             req2 = Request("http://scrapytest.org/sub1/")
116 |             mw.process_request(req2, crawler.spider)
117 | 
118 |             l.check()
119 | 
120 |     def test_do_not_break_on_non_utf8_header(self):
121 |         req = Request("http://scrapytest.org/")
122 |         assert self.mw.process_request(req, self.spider) is None
123 |         assert "Cookie" not in req.headers
124 | 
125 |         headers = {"Set-Cookie": b"C1=in\xa3valid; path=/", "Other": b"ignore\xa3me"}
126 |         res = Response("http://scrapytest.org/", headers=headers)
127 |         assert self.mw.process_response(req, res, self.spider) is res
128 | 
129 |         req2 = Request("http://scrapytest.org/sub1/")
130 |         assert self.mw.process_request(req2, self.spider) is None
131 |         self.assertIn("Cookie", req2.headers)
132 | 
133 |     def test_dont_merge_cookies(self):
134 |         # merge some cookies into jar
135 |         headers = {"Set-Cookie": "C1=value1; path=/"}
136 |         req = Request("http://scrapytest.org/")
137 |         res = Response("http://scrapytest.org/", headers=headers)
138 |         assert self.mw.process_response(req, res, self.spider) is res
139 | 
140 |         # test Cookie header is not seted to request
141 |         req = Request("http://scrapytest.org/dontmerge", meta={"dont_merge_cookies": 1})
142 |         assert self.mw.process_request(req, self.spider) is None
143 |         assert "Cookie" not in req.headers
144 | 
145 |         # check that returned cookies are not merged back to jar
146 |         res = Response(
147 |             "http://scrapytest.org/dontmerge",
148 |             headers={"Set-Cookie": "dont=mergeme; path=/"},
149 |         )
150 |         assert self.mw.process_response(req, res, self.spider) is res
151 | 
152 |         # check that cookies are merged back
153 |         req = Request("http://scrapytest.org/mergeme")
154 |         assert self.mw.process_request(req, self.spider) is None
155 |         self.assertEqual(req.headers.get("Cookie"), b"C1=value1")
156 | 
157 |         # check that cookies are merged when dont_merge_cookies is passed as 0
158 |         req = Request("http://scrapytest.org/mergeme", meta={"dont_merge_cookies": 0})
159 |         assert self.mw.process_request(req, self.spider) is None
160 |         self.assertEqual(req.headers.get("Cookie"), b"C1=value1")
161 | 
162 |     def test_complex_cookies(self):
163 |         # merge some cookies into jar
164 |         cookies = [
165 |             {
166 |                 "name": "C1",
167 |                 "value": "value1",
168 |                 "path": "/foo",
169 |                 "domain": "scrapytest.org",
170 |             },
171 |             {
172 |                 "name": "C2",
173 |                 "value": "value2",
174 |                 "path": "/bar",
175 |                 "domain": "scrapytest.org",
176 |             },
177 |             {
178 |                 "name": "C3",
179 |                 "value": "value3",
180 |                 "path": "/foo",
181 |                 "domain": "scrapytest.org",
182 |             },
183 |             {"name": "C4", "value": "value4", "path": "/foo", "domain": "scrapy.org"},
184 |         ]
185 | 
186 |         req = Request("http://scrapytest.org/", cookies=cookies)
187 |         self.mw.process_request(req, self.spider)
188 | 
189 |         # embed C1 and C3 for scrapytest.org/foo
190 |         req = Request("http://scrapytest.org/foo")
191 |         self.mw.process_request(req, self.spider)
192 |         assert req.headers.get("Cookie") in (
193 |             b"C1=value1; C3=value3",
194 |             b"C3=value3; C1=value1",
195 |         )
196 | 
197 |         # embed C2 for scrapytest.org/bar
198 |         req = Request("http://scrapytest.org/bar")
199 |         self.mw.process_request(req, self.spider)
200 |         self.assertEqual(req.headers.get("Cookie"), b"C2=value2")
201 | 
202 |         # embed nothing for scrapytest.org/baz
203 |         req = Request("http://scrapytest.org/baz")
204 |         self.mw.process_request(req, self.spider)
205 |         assert "Cookie" not in req.headers
206 | 
207 |     def test_merge_request_cookies(self):
208 |         req = Request("http://scrapytest.org/", cookies={"galleta": "salada"})
209 |         assert self.mw.process_request(req, self.spider) is None
210 |         self.assertEqual(req.headers.get("Cookie"), b"galleta=salada")
211 | 
212 |         headers = {"Set-Cookie": "C1=value1; path=/"}
213 |         res = Response("http://scrapytest.org/", headers=headers)
214 |         assert self.mw.process_response(req, res, self.spider) is res
215 | 
216 |         req2 = Request("http://scrapytest.org/sub1/")
217 |         assert self.mw.process_request(req2, self.spider) is None
218 | 
219 |         self.assertCookieValEqual(
220 |             req2.headers.get("Cookie"), b"C1=value1; galleta=salada"
221 |         )
222 | 
223 |     def test_cookiejar_key(self):
224 |         req = Request(
225 |             "http://scrapytest.org/",
226 |             cookies={"galleta": "salada"},
227 |             meta={"cookiejar": "store1"},
228 |         )
229 |         assert self.mw.process_request(req, self.spider) is None
230 |         self.assertEqual(req.headers.get("Cookie"), b"galleta=salada")
231 | 
232 |         headers = {"Set-Cookie": "C1=value1; path=/"}
233 |         res = Response("http://scrapytest.org/", headers=headers, request=req)
234 |         assert self.mw.process_response(req, res, self.spider) is res
235 | 
236 |         req2 = Request("http://scrapytest.org/", meta=res.meta)
237 |         assert self.mw.process_request(req2, self.spider) is None
238 |         self.assertCookieValEqual(
239 |             req2.headers.get("Cookie"), b"C1=value1; galleta=salada"
240 |         )
241 | 
242 |         req3 = Request(
243 |             "http://scrapytest.org/",
244 |             cookies={"galleta": "dulce"},
245 |             meta={"cookiejar": "store2"},
246 |         )
247 |         assert self.mw.process_request(req3, self.spider) is None
248 |         self.assertEqual(req3.headers.get("Cookie"), b"galleta=dulce")
249 | 
250 |         headers = {"Set-Cookie": "C2=value2; path=/"}
251 |         res2 = Response("http://scrapytest.org/", headers=headers, request=req3)
252 |         assert self.mw.process_response(req3, res2, self.spider) is res2
253 | 
254 |         req4 = Request("http://scrapytest.org/", meta=res2.meta)
255 |         assert self.mw.process_request(req4, self.spider) is None
256 |         self.assertCookieValEqual(
257 |             req4.headers.get("Cookie"), b"C2=value2; galleta=dulce"
258 |         )
259 | 
260 |         # cookies from hosts with port
261 |         req5_1 = Request("http://scrapytest.org:1104/")
262 |         assert self.mw.process_request(req5_1, self.spider) is None
263 | 
264 |         headers = {"Set-Cookie": "C1=value1; path=/"}
265 |         res5_1 = Response(
266 |             "http://scrapytest.org:1104/", headers=headers, request=req5_1
267 |         )
268 |         assert self.mw.process_response(req5_1, res5_1, self.spider) is res5_1
269 | 
270 |         req5_2 = Request("http://scrapytest.org:1104/some-redirected-path")
271 |         assert self.mw.process_request(req5_2, self.spider) is None
272 |         self.assertEqual(req5_2.headers.get("Cookie"), b"C1=value1")
273 | 
274 |         req5_3 = Request("http://scrapytest.org/some-redirected-path")
275 |         assert self.mw.process_request(req5_3, self.spider) is None
276 |         self.assertEqual(req5_3.headers.get("Cookie"), b"C1=value1")
277 | 
278 |         # skip cookie retrieval for not http request
279 |         req6 = Request("file:///scrapy/sometempfile")
280 |         assert self.mw.process_request(req6, self.spider) is None
281 |         self.assertEqual(req6.headers.get("Cookie"), None)
282 | 
283 |     def test_local_domain(self):
284 |         request = Request("http://example-host/", cookies={"currencyCookie": "USD"})
285 |         assert self.mw.process_request(request, self.spider) is None
286 |         self.assertIn("Cookie", request.headers)
287 |         self.assertEqual(b"currencyCookie=USD", request.headers["Cookie"])
288 | 


--------------------------------------------------------------------------------
/tests/test_storages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapedia/scrapy-cookies/342eaada3b84db4971be09862c34db9f207c0fb7/tests/test_storages/__init__.py


--------------------------------------------------------------------------------
/tests/test_storages/confest.py:
--------------------------------------------------------------------------------
1 | pytest_plugins = ["docker_compose"]
2 | 


--------------------------------------------------------------------------------
/tests/test_storages/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   mongo:
 4 |     container_name: dc-pytest-scrapy-cookies-mongo
 5 |     image: mongo:latest
 6 |     networks:
 7 |       - pytest_scrapy_cookies
 8 |     ports:
 9 |       - "127.0.0.1:27017:27017"
10 |     restart: always
11 |     tty: true
12 |   redis:
13 |     container_name: dc-pytest-scrapy-cookies-redis
14 |     image: redis:latest
15 |     networks:
16 |       - pytest_scrapy_cookies
17 |     ports:
18 |       - "127.0.0.1:6379:6379"
19 |     restart: always
20 |     tty: true
21 | 
22 | networks:
23 |   pytest_scrapy_cookies:
24 |     driver: bridge
25 | 


--------------------------------------------------------------------------------
/tests/test_storages/test_storage_in_memory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from copy import deepcopy
 4 | from unittest import TestCase
 5 | 
 6 | from scrapy import Spider
 7 | from scrapy.http.cookies import CookieJar
 8 | from scrapy.settings import Settings
 9 | 
10 | from scrapy_cookies.settings import default_settings
11 | from scrapy_cookies.storage.in_memory import InMemoryStorage
12 | 
13 | 
14 | class StorageTest(TestCase):
15 |     def setUp(self):
16 |         self.spider = Spider("foo")
17 |         self.settings = Settings()
18 |         self.settings.setmodule(default_settings)
19 | 
20 |     def tearDown(self):
21 |         pass
22 | 
23 |     def test_in_memory(self):
24 |         tmpdir = tempfile.mkdtemp()
25 |         local_settings = {
26 |             "COOKIES_PERSISTENCE": True,
27 |             "COOKIES_PERSISTENCE_DIR": tmpdir + "/cookies",
28 |         }
29 |         settings = deepcopy(self.settings)
30 |         settings.setdict(local_settings)
31 | 
32 |         storage = InMemoryStorage(settings)
33 |         storage.open_spider(self.spider)
34 | 
35 |         cookie = storage["no_key"]
36 |         self.assertIsInstance(cookie, CookieJar)
37 |         self.assertDictEqual(cookie._cookies, CookieJar()._cookies)
38 | 
39 |         storage["key_1"] = CookieJar()
40 |         self.assertIn("key_1", storage)
41 |         self.assertEqual(storage["key_1"]._cookies, CookieJar()._cookies)
42 | 
43 |         storage.close_spider(self.spider)
44 |         self.assertTrue(os.path.isfile(tmpdir + "/cookies"))
45 | 


--------------------------------------------------------------------------------
/tests/test_storages/test_storage_mongo.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from collections.abc import Iterable
 3 | from unittest import TestCase
 4 | 
 5 | from pytest import mark
 6 | from scrapy import Spider
 7 | from scrapy.http.cookies import CookieJar
 8 | from scrapy.settings import Settings
 9 | 
10 | from scrapy_cookies.settings import default_settings
11 | from scrapy_cookies.storage.mongo import MongoStorage
12 | 
13 | 
14 | @mark.usefixtures("class_scoped_container_getter")
15 | class MongoStorageTest(TestCase):
16 |     local_settings = {
17 |         "COOKIES_STORAGE": "scrapy_cookies.storage.mongo.MongoStorage",
18 |         "COOKIES_MONGO_MONGOCLIENT_HOST": "localhost",
19 |         "COOKIES_MONGO_MONGOCLIENT_PORT": 27017,
20 |         "COOKIES_MONGO_MONGOCLIENT_DOCUMENT_CLASS": dict,
21 |         "COOKIES_MONGO_MONGOCLIENT_TZ_AWARE": False,
22 |         "COOKIES_MONGO_MONGOCLIENT_CONNECT": True,
23 |         "COOKIES_MONGO_MONGOCLIENT_KWARGS": {},
24 |         "COOKIES_MONGO_DATABASE": "cookies",
25 |         "COOKIES_MONGO_COLLECTION": "cookies",
26 |     }
27 | 
28 |     def setUp(self):
29 |         self.spider = Spider("foo")
30 |         self.settings = Settings()
31 |         self.settings.setmodule(default_settings)
32 |         self.settings.setdict(self.local_settings)
33 |         self.storage = MongoStorage(self.settings)
34 |         self.storage.open_spider(self.spider)
35 | 
36 |     def tearDown(self):
37 |         self.storage.close_spider(self.spider)
38 |         self.storage.coll.delete_many({})
39 | 
40 |     def test_getitem(self):
41 |         cookies = CookieJar()
42 |         self.storage.coll.insert_one(
43 |             {
44 |                 "key": "new_cookies",
45 |                 "cookiejar": pickle.dumps(cookies),
46 |                 "cookies": cookies._cookies,
47 |             }
48 |         )
49 | 
50 |         self.assertDictEqual(self.storage["new_cookies"]._cookies, cookies._cookies)
51 | 
52 |     def test_missing(self):
53 |         self.assertDictEqual(
54 |             self.storage["no_exist_cookies"]._cookies, CookieJar()._cookies
55 |         )
56 | 
57 |     def test_setitem(self):
58 |         cookies = CookieJar()
59 |         self.storage["new_cookies"] = cookies
60 |         self.assertDictEqual(
61 |             self.storage.coll.find_one({"key": "new_cookies"}, {"_id": 0}),
62 |             {
63 |                 "key": "new_cookies",
64 |                 "cookiejar": pickle.dumps(cookies),
65 |                 "cookies": cookies._cookies,
66 |             },
67 |         )
68 | 
69 |     def test_iter(self):
70 |         self.assertIsInstance(self.storage, Iterable)
71 | 


--------------------------------------------------------------------------------
/tests/test_storages/test_storage_redis.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from collections.abc import Iterable
 3 | from unittest import TestCase
 4 | 
 5 | import ujson
 6 | from pytest import mark
 7 | from scrapy import Spider
 8 | from scrapy.http.cookies import CookieJar
 9 | from scrapy.settings import Settings
10 | 
11 | from scrapy_cookies.settings import default_settings
12 | from scrapy_cookies.storage.redis_ import RedisStorage
13 | 
14 | 
15 | @mark.usefixtures("class_scoped_container_getter")
16 | class RedisStorageTest(TestCase):
17 |     maxDiff = None
18 |     local_settings = {}
19 | 
20 |     def setUp(self):
21 |         self.spider = Spider("foo")
22 |         self.settings = Settings()
23 |         self.settings.setmodule(default_settings)
24 |         self.settings.setdict(self.local_settings)
25 |         self.storage = RedisStorage(self.settings)
26 |         self.storage.open_spider(self.spider)
27 | 
28 |     def tearDown(self):
29 |         self.storage.close_spider(self.spider)
30 |         self.storage.r.flushall()
31 | 
32 |     def test_getitem(self):
33 |         cookies = CookieJar()
34 |         self.storage.r.hmset(
35 |             "new_cookies",
36 |             {
37 |                 "cookiejar": pickle.dumps(cookies),
38 |                 "cookies": ujson.dumps(cookies._cookies),
39 |             },
40 |         )
41 |         self.assertDictEqual(self.storage["new_cookies"]._cookies, cookies._cookies)
42 | 
43 |     def test_missing(self):
44 |         self.assertDictEqual(
45 |             self.storage["no_exist_cookies"]._cookies, CookieJar()._cookies
46 |         )
47 | 
48 |     def test_setitem(self):
49 |         cookies = CookieJar()
50 |         self.storage["new_cookies"] = cookies
51 |         _ = self.storage.r.hgetall("new_cookies")
52 |         self.assertDictEqual(
53 |             pickle.loads(self.storage.r.hgetall("new_cookies")[b"cookiejar"])._cookies,
54 |             cookies._cookies,
55 |         )
56 |         self.assertDictEqual(
57 |             self.storage.r.hgetall("new_cookies"),
58 |             {
59 |                 b"cookiejar": pickle.dumps(cookies),
60 |                 b"cookies": ujson.dumps(cookies._cookies).encode(),
61 |             },
62 |         )
63 | 
64 |     def test_iter(self):
65 |         self.assertIsInstance(self.storage, Iterable)
66 | 
67 |     def test_len(self):
68 |         self.assertEqual(len(self.storage), 0)
69 |         self.storage["new_cookies_1"] = CookieJar()
70 |         self.assertEqual(len(self.storage), 1)
71 |         self.storage["new_cookies_2"] = CookieJar()
72 |         self.assertEqual(len(self.storage), 2)
73 | 
74 |     def test_delitem(self):
75 |         self.storage["new_cookies"] = CookieJar()
76 |         del self.storage["new_cookies"]
77 |         self.assertFalse(self.storage.r.hgetall("new_cookies"))
78 | 


--------------------------------------------------------------------------------
/tests/test_storages/test_storage_sqlite.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | from copy import deepcopy
 4 | from unittest import TestCase
 5 | 
 6 | from scrapy import Spider
 7 | from scrapy.http.cookies import CookieJar
 8 | from scrapy.settings import Settings
 9 | 
10 | from scrapy_cookies.settings import default_settings
11 | from scrapy_cookies.storage.sqlite import SQLiteStorage
12 | 
13 | 
14 | class StorageTest(TestCase):
15 |     def setUp(self):
16 |         self.spider = Spider("foo")
17 |         self.settings = Settings()
18 |         self.settings.setmodule(default_settings)
19 | 
20 |     def tearDown(self):
21 |         pass
22 | 
23 |     def test_sqlite(self):
24 |         tmpdir = tempfile.mkdtemp()
25 |         local_settings = {
26 |             "COOKIES_STORAGE": "scrapy_cookies.storage.sqlite.SQLiteStorage",
27 |             "COOKIES_SQLITE_DATABASE": ":memory:",
28 |             "COOKIES_PERSISTENCE": True,
29 |             "COOKIES_PERSISTENCE_DIR": tmpdir + "/cookies",
30 |         }
31 |         settings = deepcopy(self.settings)
32 |         settings.setdict(local_settings)
33 | 
34 |         storage = SQLiteStorage(settings)
35 |         storage.open_spider(self.spider)
36 | 
37 |         cookie = storage["no_key"]
38 |         self.assertIn("no_key", storage)
39 |         self.assertIsInstance(cookie, CookieJar)
40 |         self.assertEqual(cookie._cookies, CookieJar()._cookies)
41 | 
42 |         storage["key_1"] = CookieJar()
43 |         self.assertIn("key_1", storage)
44 |         self.assertEqual(storage["key_1"]._cookies, CookieJar()._cookies)
45 | 
46 |         self.assertNotIn("key_2", storage)
47 | 
48 |         self.assertEqual(len(storage), 2)
49 | 
50 |         _dict = {"no_key": CookieJar()._cookies, "key_1": CookieJar()._cookies}
51 |         for k, v in storage:
52 |             self.assertDictEqual(v._cookies, _dict[k])
53 | 
54 |         storage.close_spider(self.spider)
55 |         self.assertTrue(os.path.isfile(tmpdir + "/cookies"))
56 | 
57 |         storage_2 = SQLiteStorage(settings)
58 |         storage_2.open_spider(self.spider)
59 |         self.assertIn("key_1", storage_2)
60 |         self.assertDictEqual(storage_2["key_1"]._cookies, CookieJar()._cookies)
61 | 
62 |         storage_2.close_spider(self.spider)
63 |         self.assertTrue(os.path.isfile(tmpdir + "/cookies"))
64 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (https://tox.readthedocs.io/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | [tox]
 7 | envlist = py36,py37
 8 | 
 9 | [testenv]
10 | commands =
11 |     pytest
12 | deps =
13 |     -r requirements.txt
14 |     -r tests/requirements.txt
15 | passenv =
16 |     PYTHONPATH
17 | 
18 | [docs]
19 | changedir = docs
20 | deps =
21 |     -r docs/requirements.txt
22 | 
23 | [testenv:docs]
24 | changedir = {[docs]changedir}
25 | deps = {[docs]deps}
26 | commands =
27 |     sphinx-build -W -b html . {envtmpdir}/html
28 | 
29 | [testenv:docs-links]
30 | changedir = {[docs]changedir}
31 | deps = {[docs]deps}
32 | commands =
33 |     sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
34 | 


--------------------------------------------------------------------------------