├── setup.cfg
├── .bumpversion.cfg
├── dev_requirements.txt
├── .gitignore
├── tox.ini
├── .travis.yml
├── CONTRIBUTORS.rst
├── cachecontrol
├── __init__.py
├── wrapper.py
├── caches
│ ├── __init__.py
│ ├── redis_cache.py
│ └── file_cache.py
├── compat.py
├── cache.py
├── filewrapper.py
├── adapter.py
├── serialize.py
└── controller.py
├── tests
├── test_storage_redis.py
├── test_adapter.py
├── test_storage_filecache.py
├── test_max_age.py
├── test_vary.py
├── test_etag.py
└── test_cache_control.py
├── tasks.py
├── setup.py
├── README.rst
├── examples
└── benchmark.py
├── docs
├── usage.rst
├── index.rst
├── tips.rst
├── storage.rst
├── etags.rst
├── Makefile
└── conf.py
└── conftest.py
/setup.cfg:
--------------------------------------------------------------------------------
1 | [pytest]
2 | norecursedirs = bin lib include build
--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.9.3
3 | files = setup.py
4 | commit = True
5 | tag = True
6 |
7 |
--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
1 | -e .
2 |
3 | tox
4 | pytest
5 | mock
6 | webtest
7 | sphinx
8 | redis
9 | lockfile
10 | bumpversion
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.pyc
3 | *.pyo
4 | *.egg-info/*
5 | dist
6 | bin
7 | lib
8 | lib64
9 | include
10 | .Python
11 | docs/_build
12 | build/
13 | .tox
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py27, py33
3 |
4 | [testenv]
5 | deps = pytest
6 | mock
7 | webtest
8 | redis
9 | lockfile
10 | commands = py.test
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 |
3 | env:
4 | - TOXENV=py26
5 | - TOXENV=py27
6 | - TOXENV=py32
7 | - TOXENV=py33
8 |
9 | install: "pip install tox"
10 |
11 | script: tox
12 |
--------------------------------------------------------------------------------
/CONTRIBUTORS.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | Contributors
3 | ==============
4 |
5 | Huge thanks to all those folks who have helped improve CacheControl!
6 |
7 | - Toby White
8 | - Ian Cordasco
9 | - Cory Benfield
10 | - Javier de la Rosa
11 |
--------------------------------------------------------------------------------
/cachecontrol/__init__.py:
--------------------------------------------------------------------------------
1 | """CacheControl import Interface.
2 |
3 | Make it easy to import from cachecontrol without long namespaces.
4 | """
5 | from .wrapper import CacheControl
6 | from .adapter import CacheControlAdapter
7 | from .controller import CacheController
8 |
--------------------------------------------------------------------------------
/tests/test_storage_redis.py:
--------------------------------------------------------------------------------
1 | from datetime import datetime
2 |
3 | from mock import Mock
4 | from cachecontrol.caches import RedisCache
5 |
6 |
7 | class TestRedisCache(object):
8 |
9 | def setup(self):
10 | self.conn = Mock()
11 | self.cache = RedisCache(self.conn)
12 |
13 | def test_set_expiration(self):
14 | self.cache.set('foo', 'bar', expires=datetime(2014, 2, 2))
15 | assert self.conn.setex.called
16 |
--------------------------------------------------------------------------------
/cachecontrol/wrapper.py:
--------------------------------------------------------------------------------
1 | from .adapter import CacheControlAdapter
2 | from .cache import DictCache
3 |
4 |
5 | def CacheControl(sess, cache=None, cache_etags=True, serializer=None):
6 | cache = cache or DictCache()
7 | adapter = CacheControlAdapter(
8 | cache,
9 | cache_etags=cache_etags,
10 | serializer=serializer,
11 | )
12 | sess.mount('http://', adapter)
13 | sess.mount('https://', adapter)
14 |
15 | return sess
16 |
--------------------------------------------------------------------------------
/cachecontrol/caches/__init__.py:
--------------------------------------------------------------------------------
1 | from textwrap import dedent
2 |
3 | try:
4 | from .file_cache import FileCache
5 | except ImportError:
6 | notice = dedent('''
7 | NOTE: In order to use the FileCache you must have
8 | lockfile installed. You can install it via pip:
9 | pip install lockfile
10 | ''')
11 | print(notice)
12 |
13 |
14 | try:
15 | import redis
16 | from .redis_cache import RedisCache
17 | except ImportError:
18 | pass
19 |
--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
1 | from invoke import run, task
2 |
3 |
4 | VENV = 'venv'
5 |
6 |
7 | def env_do(tail, **kw):
8 | return run('%s/bin/%s' % (VENV, tail), **kw)
9 |
10 |
11 | @task
12 | def virtualenv():
13 | run('virtualenv %s' % VENV)
14 |
15 |
16 | @task('virtualenv')
17 | def bootstrap():
18 | env_do('pip install -r dev_requirements.txt')
19 |
20 |
21 | @task
22 | def clean_env():
23 | run('rm -r venv')
24 |
25 |
26 | @task
27 | def test_all():
28 | env_do('tox')
29 |
30 |
31 | @task
32 | def docs():
33 | run('cd docs && make html')
34 |
35 |
36 | @task
37 | def release(part):
38 | env_do('bumpversion %s' % part)
39 | run('git push origin master')
40 | run('git push --tags origin master')
41 | env_do('python setup.py sdist upload')
42 |
--------------------------------------------------------------------------------
/cachecontrol/compat.py:
--------------------------------------------------------------------------------
1 | try:
2 | from urllib.parse import urljoin
3 | except ImportError:
4 | from urlparse import urljoin
5 |
6 |
7 | try:
8 | import email.utils
9 | parsedate_tz = email.utils.parsedate_tz
10 | except ImportError:
11 | import email.Utils
12 | parsedate_tz = email.Utils.parsedate_tz
13 |
14 |
15 | try:
16 | import cPickle as pickle
17 | except ImportError:
18 | import pickle
19 |
20 |
21 | # Handle the case where the requests has been patched to not have urllib3
22 | # bundled as part of it's source.
23 | try:
24 | from requests.packages.urllib3.response import HTTPResponse
25 | except ImportError:
26 | from urllib3.response import HTTPResponse
27 |
28 | try:
29 | from requests.packages.urllib3.util import is_fp_closed
30 | except ImportError:
31 | from urllib3.util import is_fp_closed
32 |
--------------------------------------------------------------------------------
/cachecontrol/cache.py:
--------------------------------------------------------------------------------
1 | """
2 | The cache object API for implementing caches. The default is just a
3 | dictionary, which in turns means it is not threadsafe for writing.
4 | """
5 | from threading import Lock
6 |
7 |
8 | class BaseCache(object):
9 |
10 | def get(self, key):
11 | raise NotImplemented()
12 |
13 | def set(self, key, value):
14 | raise NotImplemented()
15 |
16 | def delete(self, key):
17 | raise NotImplemented()
18 |
19 |
20 | class DictCache(BaseCache):
21 |
22 | def __init__(self, init_dict=None):
23 | self.lock = Lock()
24 | self.data = init_dict or {}
25 |
26 | def get(self, key):
27 | return self.data.get(key, None)
28 |
29 | def set(self, key, value):
30 | with self.lock:
31 | self.data.update({key: value})
32 |
33 | def delete(self, key):
34 | with self.lock:
35 | if key in self.data:
36 | self.data.pop(key)
37 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | long_description = open('README.rst').read()
4 |
5 | VERSION = '0.9.3'
6 |
7 | setup_params = dict(
8 | name='CacheControl',
9 | version=VERSION,
10 | author='Eric Larson',
11 | author_email='eric@ionrock.org',
12 | license='MIT',
13 | url='https://github.com/ionrock/cachecontrol',
14 | keywords='requests http caching web',
15 | packages=setuptools.find_packages(),
16 | description='httplib2 caching for requests',
17 | long_description=long_description,
18 | install_requires=[
19 | 'requests',
20 | ],
21 | classifiers=[
22 | 'Development Status :: 4 - Beta',
23 | 'Environment :: Web Environment',
24 | 'License :: OSI Approved :: MIT License',
25 | 'Operating System :: OS Independent',
26 | 'Programming Language :: Python',
27 | 'Topic :: Internet :: WWW/HTTP',
28 | ],
29 | )
30 |
31 |
32 | if __name__ == '__main__':
33 | setuptools.setup(**setup_params)
34 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | CacheControl
3 | ==============
4 |
5 | CacheControl is a port of the caching algorithms in httplib2_ for use with
6 | requests_ session object.
7 |
8 | It was written because httplib2's better support for caching is often
9 | mitigated by its lack of threadsafety. The same is true of requests in
10 | terms of caching.
11 |
12 |
13 | Quickstart
14 | ==========
15 |
16 | ::
17 |
18 | import requests
19 |
20 | from cachecontrol import CacheControl
21 |
22 |
23 | sess = requests.session()
24 | cached_sess = CacheControl(sess)
25 |
26 | response = cached_sess.get('http://google.com')
27 |
28 | If the URL contains any caching based headers, it will cache the
29 | result in a simple dictionary.
30 |
31 | For more info, check out the docs_
32 |
33 | .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
34 | :target: https://travis-ci.org/ionrock/cachecontrol
35 |
36 | .. _docs: http://cachecontrol.readthedocs.org/en/latest/
37 | .. _httplib2: http://code.google.com/p/httplib2/
38 | .. _requests: http://docs.python-requests.org/
39 |
--------------------------------------------------------------------------------
/cachecontrol/filewrapper.py:
--------------------------------------------------------------------------------
1 | from .compat import is_fp_closed
2 |
3 |
4 | class CallbackFileWrapper(object):
5 | """
6 | Small wrapper around a fp object which will tee everything read into a
7 | buffer, and when that file is closed it will execute a callback with the
8 | contents of that buffer.
9 |
10 | All attributes are proxied to the underlying file object.
11 |
12 | This class uses members with a double underscore (__) leading prefix so as
13 | not to accidentally shadow an attribute.
14 | """
15 |
16 | def __init__(self, fp, callback):
17 | self.__buf = b""
18 | self.__fp = fp
19 | self.__callback = callback
20 |
21 | def __getattr__(self, name):
22 | return getattr(self.__fp, name)
23 |
24 | def read(self, amt=None):
25 | data = self.__fp.read(amt)
26 | self.__buf += data
27 |
28 | # Is this the best way to figure out if the file has been completely
29 | # consumed?
30 | if is_fp_closed(self.__fp):
31 | self.__callback(self.__buf)
32 |
33 | return data
34 |
--------------------------------------------------------------------------------
/cachecontrol/caches/redis_cache.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from datetime import datetime
4 |
5 |
6 | def total_seconds(td):
7 | """Python 2.6 compatability"""
8 | if hasattr(td, 'total_seconds'):
9 | return td.total_seconds()
10 |
11 | ms = td.microseconds
12 | secs = (td.seconds + td.days * 24 * 3600)
13 | return (ms + secs * 10**6) / 10**6
14 |
15 |
16 | class RedisCache(object):
17 |
18 | def __init__(self, conn):
19 | self.conn = conn
20 |
21 | def get(self, key):
22 | return self.conn.get(key)
23 |
24 | def set(self, key, value, expires=None):
25 | if not expires:
26 | self.conn.set(key, value)
27 | else:
28 | expires = expires - datetime.now()
29 | self.conn.setex(key, total_seconds(expires), value)
30 |
31 | def delete(self, key):
32 | self.conn.delete(key)
33 |
34 | def clear(self):
35 | """Helper for clearing all the keys in a database. Use with
36 | caution!"""
37 | for key in self.conn.keys():
38 | self.conn.delete(key)
39 |
--------------------------------------------------------------------------------
/tests/test_adapter.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from requests import Session
4 | from cachecontrol.adapter import CacheControlAdapter
5 | from cachecontrol.wrapper import CacheControl
6 |
7 |
8 | def use_wrapper():
9 | print('Using helper')
10 | sess = CacheControl(Session())
11 | return sess
12 |
13 |
14 | def use_adapter():
15 | print('Using adapter')
16 | sess = Session()
17 | sess.mount('http://', CacheControlAdapter())
18 | return sess
19 |
20 |
21 | @pytest.fixture(params=[use_adapter, use_wrapper])
22 | def sess(url, request):
23 | sess = request.param()
24 | sess.get(url)
25 | return sess
26 |
27 |
28 | class TestSessionActions(object):
29 |
30 | def test_get_caches(self, url, sess):
31 | r2 = sess.get(url)
32 | assert r2.from_cache is True
33 |
34 | def test_get_with_no_cache_does_not_cache(self, url, sess):
35 | r2 = sess.get(url, headers={'Cache-Control': 'no-cache'})
36 | assert not r2.from_cache
37 |
38 | def test_put_invalidates_cache(self, url, sess):
39 | r2 = sess.put(url, data={'foo': 'bar'})
40 | sess.get(url)
41 | assert not r2.from_cache
42 |
43 | def test_delete_invalidates_cache(self, url, sess):
44 | r2 = sess.delete(url)
45 | sess.get(url)
46 | assert not r2.from_cache
47 |
--------------------------------------------------------------------------------
/examples/benchmark.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import requests
3 | import argparse
4 |
5 | from multiprocessing import Process
6 | from datetime import datetime
7 | from wsgiref.simple_server import make_server
8 | from cachecontrol import CacheControl
9 |
10 | HOST = 'localhost'
11 | PORT = 8050
12 | URL = 'http://{0}:{1}/'.format(HOST, PORT)
13 |
14 |
15 | class Server(object):
16 |
17 | def __call__(self, env, sr):
18 | body = 'Hello World!'
19 | status = '200 OK'
20 | headers = [
21 | ('Cache-Control', 'max-age=%i' % (60 * 10)),
22 | ('Content-Type', 'text/plain'),
23 | ]
24 | sr(status, headers)
25 | return body
26 |
27 |
28 | def start_server():
29 | httpd = make_server(HOST, PORT, Server())
30 | httpd.serve_forever()
31 |
32 |
33 | def run_benchmark(sess):
34 | proc = Process(target=start_server)
35 | proc.start()
36 |
37 | start = datetime.now()
38 | for i in xrange(0, 1000):
39 | sess.get(URL)
40 | sys.stdout.write('.')
41 | end = datetime.now()
42 | print()
43 |
44 | total = end - start
45 | print('Total time for 1000 requests: %s' % total)
46 | proc.terminate()
47 |
48 |
49 | def run():
50 | parser = argparse.ArgumentParser()
51 | parser.add_argument('-n', '--no-cache',
52 | default=False,
53 | action='store_true',
54 | help='Do not use cachecontrol')
55 | args = parser.parse_args()
56 |
57 | sess = requests.Session()
58 | if not args.no_cache:
59 | sess = CacheControl(sess)
60 |
61 | run_benchmark(sess)
62 |
63 |
64 | if __name__ == '__main__':
65 | run()
66 |
--------------------------------------------------------------------------------
/tests/test_storage_filecache.py:
--------------------------------------------------------------------------------
1 | """
2 | Unit tests that verify FileCache storage works correctly.
3 | """
4 |
5 | import string
6 |
7 | from random import randint, sample
8 |
9 | import pytest
10 | import requests
11 | from cachecontrol import CacheControl
12 | from cachecontrol.caches import FileCache
13 |
14 | STORAGE_FOLDER = ".cache"
15 |
16 |
17 | def randomdata():
18 | """Plain random http data generator:"""
19 | key = ''.join(sample(string.ascii_lowercase, randint(2, 4)))
20 | val = ''.join(sample(string.ascii_lowercase + string.digits,
21 | randint(2, 10)))
22 | return '&{0}={1}'.format(key, val)
23 |
24 |
25 | class TestStorageFileCache(object):
26 |
27 | @pytest.fixture()
28 | def sess(self, server):
29 | self.url = server.application_url
30 | self.cache = FileCache(STORAGE_FOLDER)
31 | sess = CacheControl(requests.Session(), cache=self.cache)
32 | return sess
33 |
34 | def test_filecache_from_cache(self, sess):
35 | response = sess.get(self.url)
36 | assert not response.from_cache
37 | response = sess.get(self.url)
38 | assert response.from_cache
39 |
40 | def test_key_length(self, sess):
41 | """
42 | Hash table keys:
43 | Most file systems have a 255 characters path limitation.
44 | * Make sure hash method does not produce too long keys
45 | * Ideally hash method generate fixed length keys
46 | """
47 | url0 = url1 = 'http://example.org/res?a=1'
48 | while len(url0) < 255:
49 | url0 += randomdata()
50 | url1 += randomdata()
51 | assert len(self.cache.encode(url0)) < 200
52 | assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
53 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | Using CacheControl
3 | ====================
4 |
5 | CacheControl assumes you are using a `requests.Session` for your
6 | requests. If you are making ad-hoc requests using `requests.get` then
7 | you probably are not terribly concerned about caching.
8 |
9 | There are two way to use CacheControl, via the wrapper and the
10 | adapter.
11 |
12 |
13 | Wrapper
14 | =======
15 |
16 | The easiest way to use CacheControl is to utilize the basic
17 | wrapper. Here is an example: ::
18 |
19 | import requests
20 | import cachecontrol
21 |
22 | sess = cachecontrol.CacheControl(requests.Session())
23 | resp = sess.get('http://google.com')
24 |
25 | This uses the default cache store, a thread safe in-memory dictionary.
26 |
27 |
28 | Adapter
29 | =======
30 |
31 | The other way to use CacheControl is via a requests `Transport
32 | Adapter`_.
33 |
34 | Here is how the adapter works: ::
35 |
36 | import requests
37 | import cachecontrol
38 |
39 | sess = requests.Session()
40 | sess.mount('http://', CacheControlAdapter())
41 |
42 | resp = sess.get('http://google.com')
43 |
44 |
45 | Under the hood, the wrapper method of using CacheControl mentioned
46 | above is the same as this example.
47 |
48 |
49 | Use a Different Cache Store
50 | ===========================
51 |
52 | Both the wrapper and adapter classes allow providing a custom cache
53 | store object for storing your cached data. Here is an example using
54 | the provided `FileCache` from CacheControl: ::
55 |
56 | import requests
57 |
58 | from cachecontrol import CacheControl
59 |
60 | # NOTE: This requires lockfile be installed
61 | from cachecontrol.caches import FileCache
62 |
63 | sess = CacheControl(requests.Session(),
64 | cache=FileCache('.webcache'))
65 |
66 |
67 | The `FileCache` will create a directory called `.webcache` and store a
68 | file for each cached request.
69 |
70 |
71 |
72 | .. _Transport Adapter: http://docs.python-requests.org/en/latest/user/advanced/#transport-adapters
73 |
--------------------------------------------------------------------------------
/tests/test_max_age.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import pytest
3 |
4 | from requests import Session
5 | from cachecontrol.adapter import CacheControlAdapter
6 | from cachecontrol.cache import DictCache
7 |
8 |
9 | class NullSerializer(object):
10 |
11 | def dumps(self, request, response, body=None):
12 | return response
13 |
14 | def loads(self, request, data):
15 | return data
16 |
17 |
18 | class TestMaxAge(object):
19 |
20 | @pytest.fixture()
21 | def sess(self, server):
22 | self.url = server.application_url
23 | self.cache = DictCache()
24 | sess = Session()
25 | sess.mount(
26 | 'http://',
27 | CacheControlAdapter(self.cache, serializer=NullSerializer()),
28 | )
29 | return sess
30 |
31 | def test_client_max_age_0(self, sess):
32 | """
33 | Making sure when the client uses max-age=0 we don't get a
34 | cached copy even though we're still fresh.
35 | """
36 | print('first request')
37 | r = sess.get(self.url)
38 | assert self.cache.get(self.url) == r.raw
39 |
40 | print('second request')
41 | r = sess.get(self.url, headers={'Cache-Control': 'max-age=0'})
42 |
43 | # don't remove from the cache
44 | assert self.cache.get(self.url)
45 | assert not r.from_cache
46 |
47 | def test_client_max_age_3600(self, sess):
48 | """
49 | Verify we get a cached value when the client has a
50 | reasonable max-age value.
51 | """
52 | r = sess.get(self.url)
53 | assert self.cache.get(self.url) == r.raw
54 |
55 | # request that we don't want a new one unless
56 | r = sess.get(self.url, headers={'Cache-Control': 'max-age=3600'})
57 | assert r.from_cache is True
58 |
59 | # now lets grab one that forces a new request b/c the cache
60 | # has expired. To do that we'll inject a new time value.
61 | resp = self.cache.get(self.url)
62 | resp.headers['date'] = 'Tue, 15 Nov 1994 08:12:31 GMT'
63 | r = sess.get(self.url)
64 | assert not r.from_cache
65 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. CacheControl documentation master file, created by
2 | sphinx-quickstart on Mon Nov 4 15:01:23 2013.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | Welcome to CacheControl's documentation!
7 | ========================================
8 |
9 | CacheControl is a port of the caching algorithms in httplib2_ for use with
10 | requests_ session object.
11 |
12 | It was written because httplib2's better support for caching is often
13 | mitigated by its lack of threadsafety. The same is true of requests in
14 | terms of caching.
15 |
16 |
17 | Install
18 | =======
19 |
20 | CacheControl is available from PyPI_. You can install it with pip_ ::
21 |
22 | $ pip install CacheControl
23 |
24 | Some of the included cache storage classes have external
25 | requirements. See :doc:`storage` for more info.
26 |
27 |
28 |
29 | Quick Start
30 | ===========
31 |
32 | For the impatient, here is how to get started using CacheControl ::
33 |
34 | import requests
35 |
36 | from cachecontrol import CacheControl
37 |
38 |
39 | sess = requests.session()
40 | cached_sess = CacheControl(sess)
41 |
42 | response = cached_sess.get('http://google.com')
43 |
44 |
45 | This uses a threadsafe in memory dictionary for storage.
46 |
47 |
48 | Tests
49 | =====
50 |
51 | The tests are all in cachecontrol/tests and is runnable by py.test.
52 |
53 |
54 | Disclaimers
55 | ===========
56 |
57 | CacheControl is relatively new and maybe have bugs. I have made an
58 | effort to faithfully port the tests from httplib2 to CacheControl, but
59 | there is a decent chance that I've missed something. Please file bugs
60 | if you find any issues!
61 |
62 | With that in mind, CacheControl has been used sucessfully in a
63 | production environments, replacing httplib2's usage.
64 |
65 | If you give it a try, please let me know of any issues.
66 |
67 |
68 | .. _httplib2: http://code.google.com/p/httplib2/
69 | .. _requests: http://docs.python-requests.org/
70 | .. _Editing the Web: http://www.w3.org/1999/04/Editing/
71 | .. _PyPI: https://pypi.python.org/pypi/CacheControl/
72 | .. _pip: http://www.pip-installer.org/
73 |
74 |
75 | Contents:
76 |
77 | .. toctree::
78 | :maxdepth: 2
79 |
80 | usage
81 | storage
82 | etags
83 | tips
84 |
85 |
86 |
87 | Indices and tables
88 | ==================
89 |
90 | * :ref:`genindex`
91 | * :ref:`modindex`
92 | * :ref:`search`
93 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | from pprint import pformat
2 |
3 | import pytest
4 |
5 | from webtest.http import StopableWSGIServer
6 |
7 |
8 | class SimpleApp(object):
9 |
10 | def __init__(self):
11 | self.etag_count = 0
12 | self.update_etag_string()
13 |
14 | def dispatch(self, env):
15 | path = env['PATH_INFO'][1:].split('/')
16 | segment = path.pop(0)
17 | if segment and hasattr(self, segment):
18 | return getattr(self, segment)
19 | return None
20 |
21 | def vary_accept(self, env, start_response):
22 | headers = [
23 | ('Cache-Control', 'max-age=5000'),
24 | ('Content-Type', 'text/plain'),
25 | ('Vary', 'Accept-Encoding, Accept'),
26 | ]
27 | start_response('200 OK', headers)
28 | return [pformat(env).encode("utf8")]
29 |
30 | def update_etag_string(self):
31 | self.etag_count += 1
32 | self.etag_string = '"ETAG-{0}"'.format(self.etag_count)
33 |
34 | def update_etag(self, env, start_response):
35 | self.update_etag_string()
36 | headers = [
37 | ('Cache-Control', 'max-age=5000'),
38 | ('Content-Type', 'text/plain'),
39 | ]
40 | start_response('200 OK', headers)
41 | return [pformat(env).encode("utf8")]
42 |
43 | def etag(self, env, start_response):
44 | headers = [
45 | ('Etag', self.etag_string),
46 | ]
47 | if env.get('HTTP_IF_NONE_MATCH') == self.etag_string:
48 | start_response('304 Not Modified', headers)
49 | else:
50 | start_response('200 OK', headers)
51 | return [pformat(env).encode("utf8")]
52 |
53 | def __call__(self, env, start_response):
54 | func = self.dispatch(env)
55 |
56 | if func:
57 | return func(env, start_response)
58 |
59 | headers = [
60 | ('Cache-Control', 'max-age=5000'),
61 | ('Content-Type', 'text/plain'),
62 | ]
63 | start_response('200 OK', headers)
64 | return [pformat(env).encode("utf8")]
65 |
66 |
67 | @pytest.fixture(scope='session')
68 | def server():
69 | return pytest.server
70 |
71 |
72 | @pytest.fixture()
73 | def url(server):
74 | return server.application_url
75 |
76 |
77 | def pytest_namespace():
78 | return dict(server=StopableWSGIServer.create(SimpleApp()))
79 |
80 |
81 | def pytest_unconfigure(config):
82 | pytest.server.shutdown()
83 |
--------------------------------------------------------------------------------
/docs/tips.rst:
--------------------------------------------------------------------------------
1 | =========================
2 | Tips and Best Practices
3 | =========================
4 |
5 | Caching is hard! It is considered one of the great challenges of
6 | computer science. Fortunatley, the HTTP spec helps to navigate some
7 | pitfalls of invalidation using stale responses. Below are some
8 | suggestions and best practices to help avoid the more subtle issues
9 | that can crop up using CacheControl and HTTP caching.
10 |
11 | If you have a suggetions please create a new issue in `github
12 | `_ and let folks know
13 | what you ran into and how you fixed it.
14 |
15 |
16 | Timezones
17 | =========
18 |
19 | It is important to remember that the times reported by a server may or
20 | may not be timezone aware. If you are using CacheControl with a
21 | service you control, make sure any timestamps are used consistently,
22 | especially if requests might cross timezones.
23 |
24 |
25 | Cached Responses
26 | ================
27 |
28 | We've done our best to make sure cached responses act like a normal
29 | response, but there are aspects that are different for somewhat
30 | obvious reasons.
31 |
32 | - Cached responses are never streaming
33 | - Cached repsonses have `None` for the `raw` attribute
34 |
35 | Obviously, when you cache a response, you have downloaded the entire
36 | body. Therefore, there is never a use case for streaming a cached
37 | response.
38 |
39 | With that in mind, you should be aware that if you try to cache a very
40 | large response on a network store, you still might have some latency
41 | tranferring the data from the network store to your
42 | application. Another consideration is storing large responses in a
43 | `FileCache`. If you are caching using ETags and the server is
44 | extremely specific as to what constitutes an equivalent request, it
45 | could provide many different responses for essentially the same data
46 | within the context of your application.
47 |
48 |
49 | Query String Params
50 | ===================
51 |
52 | If you are caching requests that use a large number of query string
53 | parameters, consider sorting them to ensure that the request is
54 | properly cached.
55 |
56 | Requests supports passing both dictionaries and lists of tuples as the
57 | param argument in a request. For example: ::
58 |
59 | requests.get(url, params=sorted([('foo', 'one'), ('bar', 'two')]))
60 |
61 | By ordering your params, you can be sure the cache key will be
62 | consistent across requests and you are caching effectively.
63 |
--------------------------------------------------------------------------------
/cachecontrol/caches/file_cache.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import os
3 |
4 | from lockfile import FileLock
5 |
6 |
7 | def _secure_open_write(filename, fmode):
8 | # We only want to write to this file, so open it in write only mode
9 | flags = os.O_WRONLY
10 |
11 | # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
12 | # will open *new* files.
13 | # We specify this because we want to ensure that the mode we pass is the
14 | # mode of the file.
15 | flags |= os.O_CREAT | os.O_EXCL
16 |
17 | # Do not follow symlinks to prevent someone from making a symlink that
18 | # we follow and insecurely open a cache file.
19 | if hasattr(os, "O_NOFOLLOW"):
20 | flags |= os.O_NOFOLLOW
21 |
22 | # On Windows we'll mark this file as binary
23 | if hasattr(os, "O_BINARY"):
24 | flags |= os.O_BINARY
25 |
26 | # Before we open our file, we want to delete any existing file that is
27 | # there
28 | try:
29 | os.remove(filename)
30 | except (IOError, OSError):
31 | # The file must not exist already, so we can just skip ahead to opening
32 | pass
33 |
34 | # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
35 | # race condition happens between the os.remove and this line, that an
36 | # error will be raised. Because we utilize a lockfile this should only
37 | # happen if someone is attempting to attack us.
38 | fd = os.open(filename, flags, fmode)
39 | try:
40 | return os.fdopen(fd, "wb")
41 | except:
42 | # An error occurred wrapping our FD in a file object
43 | os.close(fd)
44 | raise
45 |
46 |
47 | class FileCache(object):
48 | def __init__(self, directory, forever=False, filemode=0o0600,
49 | dirmode=0o0700):
50 | self.directory = directory
51 | self.forever = forever
52 | self.filemode = filemode
53 |
54 | if not os.path.isdir(self.directory):
55 | os.makedirs(self.directory, dirmode)
56 |
57 | @staticmethod
58 | def encode(x):
59 | return hashlib.sha224(x.encode()).hexdigest()
60 |
61 | def _fn(self, name):
62 | return os.path.join(self.directory, self.encode(name))
63 |
64 | def get(self, key):
65 | name = self._fn(key)
66 | if not os.path.exists(name):
67 | return None
68 |
69 | with open(name, 'rb') as fh:
70 | return fh.read()
71 |
72 | def set(self, key, value):
73 | name = self._fn(key)
74 | with FileLock(name) as lock:
75 | with _secure_open_write(lock.path, self.filemode) as fh:
76 | fh.write(value)
77 |
78 | def delete(self, key):
79 | name = self._fn(key)
80 | if not self.forever:
81 | os.remove(name)
82 |
--------------------------------------------------------------------------------
/tests/test_vary.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import requests
3 |
4 | from cachecontrol import CacheControl
5 | from cachecontrol.cache import DictCache
6 | from cachecontrol.compat import urljoin
7 |
8 |
9 | class TestVary(object):
10 |
11 | @pytest.fixture()
12 | def sess(self, server):
13 | self.url = urljoin(server.application_url, '/vary_accept')
14 | self.cache = DictCache()
15 | sess = CacheControl(requests.Session(), cache=self.cache)
16 | return sess
17 |
18 | def cached_equal(self, cached, resp):
19 | checks = [
20 | cached._fp.getvalue() == resp.content,
21 | cached.headers == resp.raw.headers,
22 | cached.status == resp.raw.status,
23 | cached.version == resp.raw.version,
24 | cached.reason == resp.raw.reason,
25 | cached.strict == resp.raw.strict,
26 | cached.decode_content == resp.raw.decode_content,
27 | ]
28 | return all(checks)
29 |
30 | def test_vary_example(self, sess):
31 | """RFC 2616 13.6
32 |
33 | When the cache receives a subsequent request whose Request-URI
34 | specifies one or more cache entries including a Vary header field,
35 | the cache MUST NOT use such a cache entry to construct a response
36 | to the new request unless all of the selecting request-headers
37 | present in the new request match the corresponding stored
38 | request-headers in the original request.
39 |
40 | Or, in simpler terms, when you make a request and the server
41 | returns defines a Vary header, unless all the headers listed
42 | in the Vary header are the same, it won't use the cached
43 | value.
44 | """
45 | s = sess.adapters["http://"].controller.serializer
46 | r = sess.get(self.url)
47 | c = s.loads(r.request, self.cache.get(self.url))
48 |
49 | # make sure we cached it
50 | assert self.cached_equal(c, r)
51 |
52 | # make the same request
53 | resp = sess.get(self.url)
54 | assert self.cached_equal(c, resp)
55 | assert resp.from_cache
56 |
57 | # make a similar request, changing the accept header
58 | resp = sess.get(self.url, headers={'Accept': 'text/plain, text/html'})
59 | assert not self.cached_equal(c, resp)
60 | assert not resp.from_cache
61 |
62 | # Just confirming two things here:
63 | #
64 | # 1) The server used the vary header
65 | # 2) We have more than one header we vary on
66 | #
67 | # The reason for this is that when we don't specify the header
68 | # in the request, it is considered the same in terms of
69 | # whether or not to use the cached value.
70 | assert 'vary' in r.headers
71 | assert len(r.headers['vary'].replace(' ', '').split(',')) == 2
72 |
--------------------------------------------------------------------------------
/docs/storage.rst:
--------------------------------------------------------------------------------
1 | ====================
2 | Storing Cache Data
3 | ====================
4 |
5 | CacheControl comes with a few storage backends for storing your
6 | cache'd objects.
7 |
8 |
9 | DictCache
10 | =========
11 |
12 | The `DictCache` is the default cache used when no other is
13 | provided. It is a simple threadsafe dictionary. It doesn't try to do
14 | anything smart about deadlocks or forcing a busted cache, but it
15 | should be reasonably safe to use.
16 |
17 | Also, the `DictCache` does not transform the request or response
18 | objects in anyway. Therefore it is unlikely you could persist the
19 | entire cache to disk. The converse is that it should be very fast.
20 |
21 |
22 | FileCache
23 | =========
24 |
25 | The `FileCache` is similar to the caching mechanism provided by
26 | httplib2_. It requires `lockfile`_ be installed as it prevents
27 | multiple threads from writing to the same file at the same time.
28 |
29 | Here is an example using the `FileCache`: ::
30 |
31 | import requests
32 | from cachecontrol import CacheControl
33 | from cachecontrol.caches import FileCache
34 |
35 | sess = CacheControl(requests.Session(),
36 | cache=FileCache('.web_cache'))
37 |
38 |
39 | The `FileCache` supports a `forever` flag that disables deleting from
40 | the cache. This can be helpful in debugging applications that make
41 | many web requests that you don't want to repeat. It also can be
42 | helpful in testing. Here is an example of how to use it: ::
43 |
44 | forever_cache = FileCache('.web_cache', forever=True)
45 | sess = CacheControl(requests.Session(), forever_cache)
46 |
47 |
48 | :A Note About Pickle:
49 |
50 | It should be noted that the `FileCache` uses pickle to store the
51 | cached response. Prior to `requests 2.1`_, `requests.Response`
52 | objects were not 'pickleable' due to the use of `IOBase` base
53 | classes in `urllib3` `HTTPResponse` objects. In CacheControl we work
54 | around this by patching the Response objects with the appropriate
55 | `__getstate__` and `__setstate__` methods when the requests version
56 | doesn't natively support Response pickling.
57 |
58 |
59 |
60 | RedisCache
61 | ==========
62 |
63 | The `RedisCache` uses a Redis database to store values. The values are
64 | stored as strings in redis, which means the get, set and delete
65 | actions are used.
66 |
67 | The `RedisCache` also provides a clear method to delete all keys in a
68 | database. Obviously, this should be used with caution as it is naive
69 | and works iteratively, looping over each key and deleting it.
70 |
71 | Here is an example using a `RedisCache`: ::
72 |
73 | import redis
74 | import requests
75 | from cachecontrol import CacheControl
76 | from cachecontrol.caches import RedisCache
77 |
78 |
79 | pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
80 | r = redis.Redis(connection_pool=pool)
81 | sess = CacheControl(requests.Session(), RedisCache(r))
82 |
83 | This is primarily a proof of concept, so please file bugs if there is
84 | a better method for utilizing redis as a cache.
85 |
86 |
87 | .. _httplib2: http://code.google.com/p/httplib2/
88 | .. _lockfile: https://github.com/smontanaro/pylockfile
89 | .. _requests 2.1: http://docs.python-requests.org/en/latest/community/updates/#id2
90 |
--------------------------------------------------------------------------------
/cachecontrol/adapter.py:
--------------------------------------------------------------------------------
1 | import functools
2 |
3 | from requests.adapters import HTTPAdapter
4 |
5 | from .controller import CacheController
6 | from .cache import DictCache
7 | from .filewrapper import CallbackFileWrapper
8 |
9 |
10 | class CacheControlAdapter(HTTPAdapter):
11 | invalidating_methods = set(['PUT', 'DELETE'])
12 |
13 | def __init__(self, cache=None, cache_etags=True, controller_class=None,
14 | serializer=None, *args, **kw):
15 | super(CacheControlAdapter, self).__init__(*args, **kw)
16 | self.cache = cache or DictCache()
17 |
18 | controller_factory = controller_class or CacheController
19 | self.controller = controller_factory(
20 | self.cache,
21 | cache_etags=cache_etags,
22 | serializer=serializer,
23 | )
24 |
25 | def send(self, request, **kw):
26 | """
27 | Send a request. Use the request information to see if it
28 | exists in the cache and cache the response if we need to and can.
29 | """
30 | if request.method == 'GET':
31 | cached_response = self.controller.cached_request(request)
32 | if cached_response:
33 | return self.build_response(request, cached_response, from_cache=True)
34 |
35 | # check for etags and add headers if appropriate
36 | request.headers.update(self.controller.conditional_headers(request))
37 |
38 | resp = super(CacheControlAdapter, self).send(request, **kw)
39 |
40 | return resp
41 |
42 | def build_response(self, request, response, from_cache=False):
43 | """
44 | Build a response by making a request or using the cache.
45 |
46 | This will end up calling send and returning a potentially
47 | cached response
48 | """
49 | if not from_cache and request.method == 'GET':
50 | if response.status == 304:
51 | # We must have sent an ETag request. This could mean
52 | # that we've been expired already or that we simply
53 | # have an etag. In either case, we want to try and
54 | # update the cache if that is the case.
55 | cached_response = self.controller.update_cached_response(
56 | request, response
57 | )
58 |
59 | if cached_response is not response:
60 | from_cache = True
61 |
62 | response = cached_response
63 | else:
64 | # Wrap the response file with a wrapper that will cache the
65 | # response when the stream has been consumed.
66 | response._fp = CallbackFileWrapper(
67 | response._fp,
68 | functools.partial(
69 | self.controller.cache_response,
70 | request,
71 | response,
72 | )
73 | )
74 |
75 | resp = super(CacheControlAdapter, self).build_response(
76 | request, response
77 | )
78 |
79 | # See if we should invalidate the cache.
80 | if request.method in self.invalidating_methods and resp.ok:
81 | cache_url = self.controller.cache_url(request.url)
82 | self.cache.delete(cache_url)
83 |
84 | # Give the request a from_cache attr to let people use it
85 | resp.from_cache = from_cache
86 |
87 | return resp
88 |
--------------------------------------------------------------------------------
/cachecontrol/serialize.py:
--------------------------------------------------------------------------------
1 | import io
2 |
3 | from requests.structures import CaseInsensitiveDict
4 |
5 | from .compat import HTTPResponse, pickle
6 |
7 |
8 | class Serializer(object):
9 |
10 | def dumps(self, request, response, body=None):
11 | response_headers = CaseInsensitiveDict(response.headers)
12 |
13 | if body is None:
14 | body = response.read(decode_content=False)
15 | response._fp = io.BytesIO(body)
16 |
17 | data = {
18 | "response": {
19 | "body": body,
20 | "headers": response.headers,
21 | "status": response.status,
22 | "version": response.version,
23 | "reason": response.reason,
24 | "strict": response.strict,
25 | "decode_content": response.decode_content,
26 | },
27 | }
28 |
29 | # Construct our vary headers
30 | data["vary"] = {}
31 | if "vary" in response_headers:
32 | varied_headers = response_headers['vary'].split(',')
33 | for header in varied_headers:
34 | header = header.strip()
35 | data["vary"][header] = request.headers.get(header, None)
36 |
37 | return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
38 |
39 | def loads(self, request, data):
40 | # Short circuit if we've been given an empty set of data
41 | if not data:
42 | return
43 |
44 | # Determine what version of the serializer the data was serialized
45 | # with
46 | try:
47 | ver, data = data.split(b",", 1)
48 | except ValueError:
49 | ver = b"cc=0"
50 |
51 | # Make sure that our "ver" is actually a version and isn't a false
52 | # positive from a , being in the data stream.
53 | if ver[:3] != b"cc=":
54 | data = ver + data
55 | ver = b"cc=0"
56 |
57 | # Get the version number out of the cc=N
58 | ver = ver.split(b"=", 1)[-1].decode("ascii")
59 |
60 | # Dispatch to the actual load method for the given version
61 | try:
62 | return getattr(self, "_loads_v{0}".format(ver))(request, data)
63 | except AttributeError:
64 | # This is a version we don't have a loads function for, so we'll
65 | # just treat it as a miss and return None
66 | return
67 |
68 | def _loads_v0(self, request, data):
69 | # The original legacy cache data. This doesn't contain enough
70 | # information to construct everything we need, so we'll treat this as
71 | # a miss.
72 | return
73 |
74 | def _loads_v1(self, request, data):
75 | try:
76 | cached = pickle.loads(data)
77 | except ValueError:
78 | return
79 |
80 | # Special case the '*' Vary value as it means we cannot actually
81 | # determine if the cached response is suitable for this request.
82 | if "*" in cached.get("vary", {}):
83 | return
84 |
85 | # Ensure that the Vary headers for the cached response match our
86 | # request
87 | for header, value in cached.get("vary", {}).items():
88 | if request.headers.get(header, None) != value:
89 | return
90 |
91 | body = io.BytesIO(cached["response"].pop("body"))
92 | return HTTPResponse(
93 | body=body,
94 | preload_content=False,
95 | **cached["response"]
96 | )
97 |
--------------------------------------------------------------------------------
/tests/test_etag.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import requests
3 |
4 | from cachecontrol import CacheControl
5 | from cachecontrol.cache import DictCache
6 | from cachecontrol.compat import urljoin
7 |
8 |
9 | class NullSerializer(object):
10 |
11 | def dumps(self, request, response, body=None):
12 | return response
13 |
14 | def loads(self, request, data):
15 | return data
16 |
17 |
18 | class TestETag(object):
19 | """Test our equal priority caching with ETags
20 |
21 | Equal Priority Caching is a term I've defined to describe when
22 | ETags are cached orthgonally from Time Based Caching.
23 | """
24 |
25 | @pytest.fixture()
26 | def sess(self, server):
27 | self.etag_url = urljoin(server.application_url, '/etag')
28 | self.update_etag_url = urljoin(server.application_url, '/update_etag')
29 | self.cache = DictCache()
30 | sess = CacheControl(
31 | requests.Session(),
32 | cache=self.cache,
33 | serializer=NullSerializer(),
34 | )
35 | return sess
36 |
37 | def test_etags_get_example(self, sess, server):
38 | """RFC 2616 14.26
39 |
40 | The If-None-Match request-header field is used with a method to make
41 | it conditional. A client that has one or more entities previously
42 | obtained from the resource can verify that none of those entities
43 | is current by including a list of their associated entity tags in
44 | the If-None-Match header field. The purpose of this feature is to
45 | allow efficient updates of cached information with a minimum amount
46 | of transaction overhead
47 |
48 | If any of the entity tags match the entity tag of the entity that
49 | would have been returned in the response to a similar GET request
50 | (without the If-None-Match header) on that resource, [...] then
51 | the server MUST NOT perform the requested method, [...]. Instead, if
52 | the request method was GET or HEAD, the server SHOULD respond with
53 | a 304 (Not Modified) response, including the cache-related header
54 | fields (particularly ETag) of one of the entities that matched.
55 |
56 | (Paraphrased) A server may provide an ETag header on a response. On
57 | subsequent queries, the client may reference the value of this Etag
58 | header in an If-None-Match header; on receiving such a header, the
59 | server can check whether the entity at that URL has changed from the
60 | clients last version, and if not, it can return a 304 to indicate
61 | the client can use it's current representation.
62 | """
63 | r = sess.get(self.etag_url)
64 |
65 | # make sure we cached it
66 | assert self.cache.get(self.etag_url) == r.raw
67 |
68 | # make the same request
69 | resp = sess.get(self.etag_url)
70 | assert resp.raw == r.raw
71 | assert resp.from_cache
72 |
73 | # tell the server to change the etags of the response
74 | sess.get(self.update_etag_url)
75 |
76 | resp = sess.get(self.etag_url)
77 | assert resp != r
78 | assert not resp.from_cache
79 |
80 | # Make sure we updated our cache with the new etag'd response.
81 | assert self.cache.get(self.etag_url) == resp.raw
82 |
83 |
84 | class TestDisabledETags(object):
85 | """Test our use of ETags when the response is stale and the
86 | response has an ETag.
87 | """
88 | @pytest.fixture()
89 | def sess(self, server):
90 | self.etag_url = urljoin(server.application_url, '/etag')
91 | self.update_etag_url = urljoin(server.application_url, '/update_etag')
92 | self.cache = DictCache()
93 | sess = CacheControl(requests.Session(),
94 | cache=self.cache,
95 | cache_etags=False,
96 | serializer=NullSerializer())
97 | return sess
98 |
99 | def test_expired_etags_if_none_match_response(self, sess):
100 | """Make sure an expired response that contains an ETag uses
101 | the If-None-Match header.
102 | """
103 | # get our response
104 | r = sess.get(self.etag_url)
105 |
106 | # expire our request by changing the date. Our test endpoint
107 | # doesn't provide time base caching headers, so we add them
108 | # here in order to expire the request.
109 | r.headers['Date'] = 'Tue, 26 Nov 2012 00:50:49 GMT'
110 | self.cache.set(self.etag_url, r)
111 |
112 | r = sess.get(self.etag_url)
113 | assert r.from_cache
114 | assert 'if-none-match' in r.request.headers
115 | assert r.status_code == 200
116 |
--------------------------------------------------------------------------------
/docs/etags.rst:
--------------------------------------------------------------------------------
1 | ==============
2 | ETag Support
3 | ==============
4 |
5 | CacheControl's support of ETags is slightly different than
6 | httplib2. In httplib2, an ETag is considered when using a cached
7 | response when the cache is considered stale. When a cached response is
8 | expired and it has an ETag header, it returns a response with the
9 | appropriate `If-None-Match` header. We'll call this behavior a **Time
10 | Priority** cache as the ETag support only takes effect when the time has
11 | expired.
12 |
13 | In CacheControl the default behavior when an ETag an sent by the
14 | server is to cache the response. We'll refer to this pattern as a
15 | **Equal Priority** cache as the decision to cache is either time base or
16 | due to the presense of an ETag.
17 |
18 | The spec is not explicit what takes priority when caching with both
19 | ETags and time based headers. Therefore, CacheControl supports the
20 | different mechanisms via configuration where possible.
21 |
22 |
23 | Turning Off Equal Priority Caching
24 | ==================================
25 |
26 | The danger in Equal Priority Caching is that a server that returns
27 | ETag headers for every request may fill up your cache. You can disable
28 | Equal Priority Caching and utilize a Time Priority algorithm like
29 | httplib2. ::
30 |
31 | import requests
32 | from cachecontrol import CacheControl
33 |
34 | sess = CacheControl(requests.Session(), cache_etags=False)
35 |
36 | This will only utilize ETags when they exist within the context of
37 | time based caching headers. If a response has time base caching
38 | headers that are valid along with an ETag, we will still attempt to
39 | handle a 304 Not Modified even though the cached value as
40 | expired. Here is a simple example. ::
41 |
42 | # Server response
43 | GET /foo.html
44 | Date: Tue, 26 Nov 2013 00:50:49 GMT
45 | Cache-Control: max-age=3000
46 | ETag: JAsUYM8K
47 |
48 | On a subsequent request, if the cache has expired, the next request
49 | will still include the `If-None-Match` header. The cached response
50 | will remain in the cache awaiting the response. ::
51 |
52 | # Client request
53 | GET /foo.html
54 | If-None-Match: JAsUYM8K
55 |
56 | If the server returns a `304 Not Modified`, it will use the stale
57 | cached value, updating the headers from the most recent request. ::
58 |
59 | # Server response
60 | GET /foo.html
61 | Date: Tue, 26 Nov 2013 01:30:19 GMT
62 | Cache-Control: max-age=3000
63 | ETag: JAsUYM8K
64 |
65 | If the server returns a `200 OK`, the cache will be updated
66 | accordingly.
67 |
68 |
69 | Equal Priority Caching Benefits
70 | ===============================
71 |
72 | The benefits of equal priority caching is that you have two orthogonal
73 | means of introducing a cache. The time based cache provides an
74 | effective way to reduce the load on requests that can be eventually
75 | consistent. Static resource are a great example of when time based
76 | caching is effective.
77 |
78 | The ETag based cache is effective for working with documents that are
79 | larger and/or need to be correct immediately after changes. For
80 | example, if you exported some data from a large database, the file
81 | could be 10 GBs. Being able to send an ETag with this sort of request
82 | an know the version you have locally is valid saves a ton of bandwidth
83 | and time.
84 |
85 | Likewise, if you have a resource that you want to update, you can be
86 | confident there will not be a `lost update`_ because you have local
87 | version that is stale.
88 |
89 |
90 | Endpoint Specific Caching
91 | =========================
92 |
93 | It should be pointed out that there are times when an endpoint is
94 | specifically tailored for different caching techniques. If you have a
95 | RESTful service, there might be endpoints that are specifically meant
96 | to be cached via time based caching techniques where as other
97 | endpoints should focus on using ETags. In this situation it is
98 | recommended that you use the `CacheControlAdapter` directly. ::
99 |
100 | import requests
101 | from cachecontrol import CacheControlAdapter
102 | from cachecontrol.caches import RedisCache
103 |
104 | # using django for an idea on where you might get a
105 | # username/password.
106 | from django.conf import settings
107 |
108 | # a function to return a redis connection all the instances of the
109 | # app may use. this allows updates to the API (ie PUT) to invalidate
110 | # the cache for other users.
111 | from myapp.db import redis_connection
112 |
113 |
114 | # create our session
115 | client = sess.Session(auth=(settings.user, settings.password))
116 |
117 | # we have a gettext like endpoint. this doesn't get updated very
118 | # often so a time based cache is a helpful way to reduce many small
119 | # requests.
120 | client.mount('http://myapi.foo.com/gettext/',
121 | CacheControlAdapter(cache_etags=False))
122 |
123 |
124 | # here we have user profile endpoint that lets us update information
125 | # about users. we need this to be consistent immediately after a user
126 | # updates some information because another node might handle the
127 | # request. It uses the global redis cache to coordinate the cache and
128 | # uses the equal priority caching to be sure etags are used by default.
129 | redis_cache = RedisCache(redis_connection())
130 | client.mount('http://myapi.foo.com/user_profiles/',
131 | CacheControlAdapter(cache=redis_cache))
132 |
133 | Hopefully this more indepth example reveals how to configure a
134 | `requests.Session` to better utilize ETag based caching vs. Time
135 | Priority Caching.
136 |
137 | .. _lost update: http://www.w3.org/1999/04/Editing/
138 |
--------------------------------------------------------------------------------
/tests/test_cache_control.py:
--------------------------------------------------------------------------------
1 | """
2 | Unit tests that verify our caching methods work correctly.
3 | """
4 | import pytest
5 | from mock import ANY, Mock
6 | import time
7 |
8 | from cachecontrol import CacheController
9 | from cachecontrol.cache import DictCache
10 |
11 |
12 | TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
13 |
14 |
15 | class NullSerializer(object):
16 |
17 | def dumps(self, request, response):
18 | return response
19 |
20 | def loads(self, request, data):
21 | return data
22 |
23 |
24 | class TestCacheControllerResponse(object):
25 | url = 'http://url.com/'
26 |
27 | def req(self, headers=None):
28 | headers = headers or {}
29 | return Mock(full_url=self.url, # < 1.x support
30 | url=self.url,
31 | headers=headers)
32 |
33 | def resp(self, headers=None):
34 | headers = headers or {}
35 | return Mock(status=200,
36 | headers=headers,
37 | request=self.req(),
38 | read=lambda **k: b"testing")
39 |
40 | @pytest.fixture()
41 | def cc(self):
42 | # Cache controller fixture
43 | return CacheController(Mock(), serializer=Mock())
44 |
45 | def test_no_cache_non_20x_response(self, cc):
46 | # No caching without some extra headers, so we add them
47 | now = time.strftime(TIME_FMT, time.gmtime())
48 | resp = self.resp({'cache-control': 'max-age=3600',
49 | 'date': now})
50 |
51 | no_cache_codes = [201, 300, 400, 500]
52 | for code in no_cache_codes:
53 | resp.status = code
54 | cc.cache_response(Mock(), resp)
55 | assert not cc.cache.set.called
56 |
57 | # this should work b/c the resp is 20x
58 | resp.status = 203
59 | cc.cache_response(self.req(), resp)
60 | assert cc.serializer.dumps.called
61 | assert cc.cache.set.called
62 |
63 | def test_no_cache_with_no_date(self, cc):
64 | # No date header which makes our max-age pointless
65 | resp = self.resp({'cache-control': 'max-age=3600'})
66 | cc.cache_response(self.req(), resp)
67 |
68 | assert not cc.cache.set.called
69 |
70 | def test_cache_response_no_cache_control(self, cc):
71 | resp = self.resp()
72 | cc.cache_response(self.req(), resp)
73 |
74 | assert not cc.cache.set.called
75 |
76 | def test_cache_response_cache_max_age(self, cc):
77 | now = time.strftime(TIME_FMT, time.gmtime())
78 | resp = self.resp({'cache-control': 'max-age=3600',
79 | 'date': now})
80 | req = self.req()
81 | cc.cache_response(req, resp)
82 | cc.serializer.dumps.assert_called_with(req, resp, body=None)
83 | cc.cache.set.assert_called_with(self.url, ANY)
84 |
85 | def test_cache_repsonse_no_store(self):
86 | resp = Mock()
87 | cache = DictCache({self.url: resp})
88 | cc = CacheController(cache)
89 |
90 | cache_url = cc.cache_url(self.url)
91 |
92 | resp = self.resp({'cache-control': 'no-store'})
93 | assert cc.cache.get(cache_url)
94 |
95 | cc.cache_response(self.req(), resp)
96 | assert not cc.cache.get(cache_url)
97 |
98 |
99 | class TestCacheControlRequest(object):
100 | url = 'http://foo.com/bar'
101 |
102 | def setup(self):
103 | self.c = CacheController(
104 | DictCache(),
105 | serializer=NullSerializer(),
106 | )
107 |
108 | def req(self, headers):
109 | return self.c.cached_request(Mock(url=self.url, headers=headers))
110 |
111 | def test_cache_request_no_cache(self):
112 | resp = self.req({'cache-control': 'no-cache'})
113 | assert not resp
114 |
115 | def test_cache_request_pragma_no_cache(self):
116 | resp = self.req({'pragma': 'no-cache'})
117 | assert not resp
118 |
119 | def test_cache_request_no_store(self):
120 | resp = self.req({'cache-control': 'no-store'})
121 | assert not resp
122 |
123 | def test_cache_request_max_age_0(self):
124 | resp = self.req({'cache-control': 'max-age=0'})
125 | assert not resp
126 |
127 | def test_cache_request_not_in_cache(self):
128 | resp = self.req({})
129 | assert not resp
130 |
131 | def test_cache_request_fresh_max_age(self):
132 | now = time.strftime(TIME_FMT, time.gmtime())
133 | resp = Mock(headers={'cache-control': 'max-age=3600',
134 | 'date': now})
135 |
136 | cache = DictCache({self.url: resp})
137 | self.c.cache = cache
138 | r = self.req({})
139 | assert r == resp
140 |
141 | def test_cache_request_unfresh_max_age(self):
142 | earlier = time.time() - 3700 # epoch - 1h01m40s
143 | now = time.strftime(TIME_FMT, time.gmtime(earlier))
144 | resp = Mock(headers={'cache-control': 'max-age=3600',
145 | 'date': now})
146 | self.c.cache = DictCache({self.url: resp})
147 | r = self.req({})
148 | assert not r
149 |
150 | def test_cache_request_fresh_expires(self):
151 | later = time.time() + 86400 # GMT + 1 day
152 | expires = time.strftime(TIME_FMT, time.gmtime(later))
153 | now = time.strftime(TIME_FMT, time.gmtime())
154 | resp = Mock(headers={'expires': expires,
155 | 'date': now})
156 | cache = DictCache({self.url: resp})
157 | self.c.cache = cache
158 | r = self.req({})
159 | assert r == resp
160 |
161 | def test_cache_request_unfresh_expires(self):
162 | sooner = time.time() - 86400 # GMT - 1 day
163 | expires = time.strftime(TIME_FMT, time.gmtime(sooner))
164 | now = time.strftime(TIME_FMT, time.gmtime())
165 | resp = Mock(headers={'expires': expires,
166 | 'date': now})
167 | cache = DictCache({self.url: resp})
168 | self.c.cache = cache
169 | r = self.req({})
170 | assert not r
171 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # Internal variables.
11 | PAPEROPT_a4 = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
14 | # the i18n builder cannot share the environment and doctrees with the others
15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
16 |
17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
18 |
19 | help:
20 | @echo "Please use \`make ' where is one of"
21 | @echo " html to make standalone HTML files"
22 | @echo " dirhtml to make HTML files named index.html in directories"
23 | @echo " singlehtml to make a single large HTML file"
24 | @echo " pickle to make pickle files"
25 | @echo " json to make JSON files"
26 | @echo " htmlhelp to make HTML files and a HTML help project"
27 | @echo " qthelp to make HTML files and a qthelp project"
28 | @echo " devhelp to make HTML files and a Devhelp project"
29 | @echo " epub to make an epub"
30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
31 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
32 | @echo " text to make text files"
33 | @echo " man to make manual pages"
34 | @echo " texinfo to make Texinfo files"
35 | @echo " info to make Texinfo files and run them through makeinfo"
36 | @echo " gettext to make PO message catalogs"
37 | @echo " changes to make an overview of all changed/added/deprecated items"
38 | @echo " linkcheck to check all external links for integrity"
39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
40 |
41 | clean:
42 | -rm -rf $(BUILDDIR)/*
43 |
44 | html:
45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
46 | @echo
47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
48 |
49 | dirhtml:
50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
51 | @echo
52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
53 |
54 | singlehtml:
55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
56 | @echo
57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
58 |
59 | pickle:
60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
61 | @echo
62 | @echo "Build finished; now you can process the pickle files."
63 |
64 | json:
65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
66 | @echo
67 | @echo "Build finished; now you can process the JSON files."
68 |
69 | htmlhelp:
70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
71 | @echo
72 | @echo "Build finished; now you can run HTML Help Workshop with the" \
73 | ".hhp project file in $(BUILDDIR)/htmlhelp."
74 |
75 | qthelp:
76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
77 | @echo
78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CacheControl.qhcp"
81 | @echo "To view the help file:"
82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CacheControl.qhc"
83 |
84 | devhelp:
85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
86 | @echo
87 | @echo "Build finished."
88 | @echo "To view the help file:"
89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/CacheControl"
90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CacheControl"
91 | @echo "# devhelp"
92 |
93 | epub:
94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
95 | @echo
96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
97 |
98 | latex:
99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | @echo
101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | "(use \`make latexpdf' here to do that automatically)."
104 |
105 | latexpdf:
106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | @echo "Running LaTeX files through pdflatex..."
108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 |
111 | text:
112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | @echo
114 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
115 |
116 | man:
117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | @echo
119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 |
121 | texinfo:
122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | @echo
124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | @echo "Run \`make' in that directory to run these through makeinfo" \
126 | "(use \`make info' here to do that automatically)."
127 |
128 | info:
129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | @echo "Running Texinfo files through makeinfo..."
131 | make -C $(BUILDDIR)/texinfo info
132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 |
134 | gettext:
135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | @echo
137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 |
139 | changes:
140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | @echo
142 | @echo "The overview file is in $(BUILDDIR)/changes."
143 |
144 | linkcheck:
145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | @echo
147 | @echo "Link check complete; look for any errors in the above output " \
148 | "or in $(BUILDDIR)/linkcheck/output.txt."
149 |
150 | doctest:
151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | @echo "Testing of doctests in the sources finished, look at the " \
153 | "results in $(BUILDDIR)/doctest/output.txt."
154 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # CacheControl documentation build configuration file, created by
4 | # sphinx-quickstart on Mon Nov 4 15:01:23 2013.
5 | #
6 | # This file is execfile()d with the current directory set to its containing dir.
7 | #
8 | # Note that not all possible configuration values are present in this
9 | # autogenerated file.
10 | #
11 | # All configuration values have a default; values that are commented out
12 | # serve to show the default.
13 |
14 | import sys, os
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #sys.path.insert(0, os.path.abspath('.'))
20 |
21 | # -- General configuration -----------------------------------------------------
22 |
23 | # If your documentation needs a minimal Sphinx version, state it here.
24 | #needs_sphinx = '1.0'
25 |
26 | # Add any Sphinx extension module names here, as strings. They can be extensions
27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.viewcode']
29 |
30 | # Add any paths that contain templates here, relative to this directory.
31 | templates_path = ['_templates']
32 |
33 | # The suffix of source filenames.
34 | source_suffix = '.rst'
35 |
36 | # The encoding of source files.
37 | #source_encoding = 'utf-8-sig'
38 |
39 | # The master toctree document.
40 | master_doc = 'index'
41 |
42 | # General information about the project.
43 | project = u'CacheControl'
44 | copyright = u'2013, Eric Larson'
45 |
46 | # The version info for the project you're documenting, acts as replacement for
47 | # |version| and |release|, also used in various other places throughout the
48 | # built documents.
49 | #
50 | # The short X.Y version.
51 | version = '0.6'
52 | # The full version, including alpha/beta/rc tags.
53 | release = '0.6'
54 |
55 | # The language for content autogenerated by Sphinx. Refer to documentation
56 | # for a list of supported languages.
57 | #language = None
58 |
59 | # There are two options for replacing |today|: either, you set today to some
60 | # non-false value, then it is used:
61 | #today = ''
62 | # Else, today_fmt is used as the format for a strftime call.
63 | #today_fmt = '%B %d, %Y'
64 |
65 | # List of patterns, relative to source directory, that match files and
66 | # directories to ignore when looking for source files.
67 | exclude_patterns = ['_build']
68 |
69 | # The reST default role (used for this markup: `text`) to use for all documents.
70 | #default_role = None
71 |
72 | # If true, '()' will be appended to :func: etc. cross-reference text.
73 | #add_function_parentheses = True
74 |
75 | # If true, the current module name will be prepended to all description
76 | # unit titles (such as .. function::).
77 | #add_module_names = True
78 |
79 | # If true, sectionauthor and moduleauthor directives will be shown in the
80 | # output. They are ignored by default.
81 | #show_authors = False
82 |
83 | # The name of the Pygments (syntax highlighting) style to use.
84 | pygments_style = 'sphinx'
85 |
86 | # A list of ignored prefixes for module index sorting.
87 | #modindex_common_prefix = []
88 |
89 |
90 | # -- Options for HTML output ---------------------------------------------------
91 |
92 | # The theme to use for HTML and HTML Help pages. See the documentation for
93 | # a list of builtin themes.
94 | html_theme = 'default'
95 |
96 | # Theme options are theme-specific and customize the look and feel of a theme
97 | # further. For a list of options available for each theme, see the
98 | # documentation.
99 | #html_theme_options = {}
100 |
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 |
104 | # The name for this set of Sphinx documents. If None, it defaults to
105 | # " v documentation".
106 | #html_title = None
107 |
108 | # A shorter title for the navigation bar. Default is the same as html_title.
109 | #html_short_title = None
110 |
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 |
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 |
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 |
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 |
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 |
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 |
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 |
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 |
143 | # If false, no index is generated.
144 | #html_use_index = True
145 |
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 |
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 |
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 |
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 |
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a tag referring to it. The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 |
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 |
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'CacheControldoc'
168 |
169 |
170 | # -- Options for LaTeX output --------------------------------------------------
171 |
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 |
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 |
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 |
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 | ('index', 'CacheControl.tex', u'CacheControl Documentation',
187 | u'Eric Larson', 'manual'),
188 | ]
189 |
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 |
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 |
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 |
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 |
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 |
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 |
210 |
211 | # -- Options for manual page output --------------------------------------------
212 |
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 | ('index', 'cachecontrol', u'CacheControl Documentation',
217 | [u'Eric Larson'], 1)
218 | ]
219 |
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 |
223 |
224 | # -- Options for Texinfo output ------------------------------------------------
225 |
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | # dir menu entry, description, category)
229 | texinfo_documents = [
230 | ('index', 'CacheControl', u'CacheControl Documentation',
231 | u'Eric Larson', 'CacheControl', 'One line description of project.',
232 | 'Miscellaneous'),
233 | ]
234 |
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 |
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 |
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 |
--------------------------------------------------------------------------------
/cachecontrol/controller.py:
--------------------------------------------------------------------------------
1 | """
2 | The httplib2 algorithms ported for use with requests.
3 | """
4 | import re
5 | import calendar
6 | import time
7 |
8 | from requests.structures import CaseInsensitiveDict
9 |
10 | from .cache import DictCache
11 | from .compat import parsedate_tz
12 | from .serialize import Serializer
13 |
14 |
15 | URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
16 |
17 |
18 | def parse_uri(uri):
19 | """Parses a URI using the regex given in Appendix B of RFC 3986.
20 |
21 | (scheme, authority, path, query, fragment) = parse_uri(uri)
22 | """
23 | groups = URI.match(uri).groups()
24 | return (groups[1], groups[3], groups[4], groups[6], groups[8])
25 |
26 |
27 | class CacheController(object):
28 | """An interface to see if request should cached or not.
29 | """
30 | def __init__(self, cache=None, cache_etags=True, serializer=None):
31 | self.cache = cache or DictCache()
32 | self.cache_etags = cache_etags
33 | self.serializer = serializer or Serializer()
34 |
35 | def _urlnorm(self, uri):
36 | """Normalize the URL to create a safe key for the cache"""
37 | (scheme, authority, path, query, fragment) = parse_uri(uri)
38 | if not scheme or not authority:
39 | raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
40 | authority = authority.lower()
41 | scheme = scheme.lower()
42 | if not path:
43 | path = "/"
44 |
45 | # Could do syntax based normalization of the URI before
46 | # computing the digest. See Section 6.2.2 of Std 66.
47 | request_uri = query and "?".join([path, query]) or path
48 | scheme = scheme.lower()
49 | defrag_uri = scheme + "://" + authority + request_uri
50 |
51 | return defrag_uri
52 |
53 | def cache_url(self, uri):
54 | return self._urlnorm(uri)
55 |
56 | def parse_cache_control(self, headers):
57 | """
58 | Parse the cache control headers returning a dictionary with values
59 | for the different directives.
60 | """
61 | retval = {}
62 |
63 | cc_header = 'cache-control'
64 | if 'Cache-Control' in headers:
65 | cc_header = 'Cache-Control'
66 |
67 | if cc_header in headers:
68 | parts = headers[cc_header].split(',')
69 | parts_with_args = [
70 | tuple([x.strip().lower() for x in part.split("=", 1)])
71 | for part in parts if -1 != part.find("=")]
72 | parts_wo_args = [(name.strip().lower(), 1)
73 | for name in parts if -1 == name.find("=")]
74 | retval = dict(parts_with_args + parts_wo_args)
75 | return retval
76 |
77 | def cached_request(self, request):
78 | cache_url = self.cache_url(request.url)
79 | cc = self.parse_cache_control(request.headers)
80 |
81 | # non-caching states
82 | no_cache = True if 'no-cache' in cc else False
83 | if 'max-age' in cc and cc['max-age'] == 0:
84 | no_cache = True
85 |
86 | # Bail out if no-cache was set
87 | if no_cache:
88 | return False
89 |
90 | # It is in the cache, so lets see if it is going to be
91 | # fresh enough
92 | resp = self.serializer.loads(request, self.cache.get(cache_url))
93 |
94 | # Check to see if we have a cached object
95 | if not resp:
96 | return False
97 |
98 | headers = CaseInsensitiveDict(resp.headers)
99 |
100 | now = time.time()
101 | date = calendar.timegm(
102 | parsedate_tz(headers['date'])
103 | )
104 | current_age = max(0, now - date)
105 |
106 | # TODO: There is an assumption that the result will be a
107 | # urllib3 response object. This may not be best since we
108 | # could probably avoid instantiating or constructing the
109 | # response until we know we need it.
110 | resp_cc = self.parse_cache_control(headers)
111 |
112 | # determine freshness
113 | freshness_lifetime = 0
114 | if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
115 | freshness_lifetime = int(resp_cc['max-age'])
116 | elif 'expires' in headers:
117 | expires = parsedate_tz(headers['expires'])
118 | if expires is not None:
119 | expire_time = calendar.timegm(expires) - date
120 | freshness_lifetime = max(0, expire_time)
121 |
122 | # determine if we are setting freshness limit in the req
123 | if 'max-age' in cc:
124 | try:
125 | freshness_lifetime = int(cc['max-age'])
126 | except ValueError:
127 | freshness_lifetime = 0
128 |
129 | if 'min-fresh' in cc:
130 | try:
131 | min_fresh = int(cc['min-fresh'])
132 | except ValueError:
133 | min_fresh = 0
134 | # adjust our current age by our min fresh
135 | current_age += min_fresh
136 |
137 | # see how fresh we actually are
138 | fresh = (freshness_lifetime > current_age)
139 |
140 | if fresh:
141 | return resp
142 |
143 | # we're not fresh. If we don't have an Etag, clear it out
144 | if 'etag' not in headers:
145 | self.cache.delete(cache_url)
146 |
147 | # return the original handler
148 | return False
149 |
150 | def conditional_headers(self, request):
151 | cache_url = self.cache_url(request.url)
152 | resp = self.serializer.loads(request, self.cache.get(cache_url))
153 | new_headers = {}
154 |
155 | if resp:
156 | headers = CaseInsensitiveDict(resp.headers)
157 |
158 | if 'etag' in headers:
159 | new_headers['If-None-Match'] = headers['ETag']
160 |
161 | if 'last-modified' in headers:
162 | new_headers['If-Modified-Since'] = headers['Last-Modified']
163 |
164 | return new_headers
165 |
166 | def cache_response(self, request, response, body=None):
167 | """
168 | Algorithm for caching requests.
169 |
170 | This assumes a requests Response object.
171 | """
172 | # From httplib2: Don't cache 206's since we aren't going to
173 | # handle byte range requests
174 | if response.status not in [200, 203]:
175 | return
176 |
177 | response_headers = CaseInsensitiveDict(response.headers)
178 |
179 | cc_req = self.parse_cache_control(request.headers)
180 | cc = self.parse_cache_control(response_headers)
181 |
182 | cache_url = self.cache_url(request.url)
183 |
184 | # Delete it from the cache if we happen to have it stored there
185 | no_store = cc.get('no-store') or cc_req.get('no-store')
186 | if no_store and self.cache.get(cache_url):
187 | self.cache.delete(cache_url)
188 |
189 | # If we've been given an etag, then keep the response
190 | if self.cache_etags and 'etag' in response_headers:
191 | self.cache.set(
192 | cache_url,
193 | self.serializer.dumps(request, response, body=body),
194 | )
195 |
196 | # Add to the cache if the response headers demand it. If there
197 | # is no date header then we can't do anything about expiring
198 | # the cache.
199 | elif 'date' in response_headers:
200 | # cache when there is a max-age > 0
201 | if cc and cc.get('max-age'):
202 | if int(cc['max-age']) > 0:
203 | self.cache.set(
204 | cache_url,
205 | self.serializer.dumps(request, response, body=body),
206 | )
207 |
208 | # If the request can expire, it means we should cache it
209 | # in the meantime.
210 | elif 'expires' in response_headers:
211 | if response_headers['expires']:
212 | self.cache.set(
213 | cache_url,
214 | self.serializer.dumps(request, response, body=body),
215 | )
216 |
217 | def update_cached_response(self, request, response):
218 | """On a 304 we will get a new set of headers that we want to
219 | update our cached value with, assuming we have one.
220 |
221 | This should only ever be called when we've sent an ETag and
222 | gotten a 304 as the response.
223 | """
224 | cache_url = self.cache_url(request.url)
225 |
226 | cached_response = self.serializer.loads(request, self.cache.get(cache_url))
227 |
228 | if not cached_response:
229 | # we didn't have a cached response
230 | return response
231 |
232 | # did so lets update our headers
233 | cached_response.headers.update(response.headers)
234 |
235 | # we want a 200 b/c we have content via the cache
236 | cached_response.status = 200
237 |
238 | # update our cache
239 | self.cache.set(
240 | cache_url,
241 | self.serializer.dumps(request, cached_response),
242 | )
243 |
244 | return cached_response
245 |
--------------------------------------------------------------------------------