├── setup.cfg
├── .bumpversion.cfg
├── dev_requirements.txt
├── .gitignore
├── tox.ini
├── .travis.yml
├── CONTRIBUTORS.rst
├── cachecontrol
    ├── __init__.py
    ├── wrapper.py
    ├── caches
    │   ├── __init__.py
    │   ├── redis_cache.py
    │   └── file_cache.py
    ├── compat.py
    ├── cache.py
    ├── filewrapper.py
    ├── adapter.py
    ├── serialize.py
    └── controller.py
├── tests
    ├── test_storage_redis.py
    ├── test_adapter.py
    ├── test_storage_filecache.py
    ├── test_max_age.py
    ├── test_vary.py
    ├── test_etag.py
    └── test_cache_control.py
├── tasks.py
├── setup.py
├── README.rst
├── examples
    └── benchmark.py
├── docs
    ├── usage.rst
    ├── index.rst
    ├── tips.rst
    ├── storage.rst
    ├── etags.rst
    ├── Makefile
    └── conf.py
└── conftest.py


/setup.cfg:
--------------------------------------------------------------------------------
1 | [pytest]
2 | norecursedirs = bin lib include build


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.9.3
3 | files = setup.py
4 | commit = True
5 | tag = True
6 | 
7 | 


--------------------------------------------------------------------------------
/dev_requirements.txt:
--------------------------------------------------------------------------------
 1 | -e .
 2 | 
 3 | tox
 4 | pytest
 5 | mock
 6 | webtest
 7 | sphinx
 8 | redis
 9 | lockfile
10 | bumpversion
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.pyc
 3 | *.pyo
 4 | *.egg-info/*
 5 | dist
 6 | bin
 7 | lib
 8 | lib64
 9 | include
10 | .Python
11 | docs/_build
12 | build/
13 | .tox


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py33
 3 | 
 4 | [testenv]
 5 | deps = pytest
 6 |        mock
 7 |        webtest
 8 |        redis
 9 |        lockfile
10 | commands = py.test


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | env:
 4 |  - TOXENV=py26
 5 |  - TOXENV=py27
 6 |  - TOXENV=py32
 7 |  - TOXENV=py33
 8 | 
 9 | install: "pip install tox"
10 | 
11 | script: tox
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTORS.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 |  Contributors
 3 | ==============
 4 | 
 5 | Huge thanks to all those folks who have helped improve CacheControl!
 6 | 
 7 |  - Toby White
 8 |  - Ian Cordasco
 9 |  - Cory Benfield
10 |  - Javier de la Rosa
11 | 


--------------------------------------------------------------------------------
/cachecontrol/__init__.py:
--------------------------------------------------------------------------------
1 | """CacheControl import Interface.
2 | 
3 | Make it easy to import from cachecontrol without long namespaces.
4 | """
5 | from .wrapper import CacheControl
6 | from .adapter import CacheControlAdapter
7 | from .controller import CacheController
8 | 


--------------------------------------------------------------------------------
/tests/test_storage_redis.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | from mock import Mock
 4 | from cachecontrol.caches import RedisCache
 5 | 
 6 | 
 7 | class TestRedisCache(object):
 8 | 
 9 |     def setup(self):
10 |         self.conn = Mock()
11 |         self.cache = RedisCache(self.conn)
12 | 
13 |     def test_set_expiration(self):
14 |         self.cache.set('foo', 'bar', expires=datetime(2014, 2, 2))
15 |         assert self.conn.setex.called
16 | 


--------------------------------------------------------------------------------
/cachecontrol/wrapper.py:
--------------------------------------------------------------------------------
 1 | from .adapter import CacheControlAdapter
 2 | from .cache import DictCache
 3 | 
 4 | 
 5 | def CacheControl(sess, cache=None, cache_etags=True, serializer=None):
 6 |     cache = cache or DictCache()
 7 |     adapter = CacheControlAdapter(
 8 |         cache,
 9 |         cache_etags=cache_etags,
10 |         serializer=serializer,
11 |     )
12 |     sess.mount('http://', adapter)
13 |     sess.mount('https://', adapter)
14 | 
15 |     return sess
16 | 


--------------------------------------------------------------------------------
/cachecontrol/caches/__init__.py:
--------------------------------------------------------------------------------
 1 | from textwrap import dedent
 2 | 
 3 | try:
 4 |     from .file_cache import FileCache
 5 | except ImportError:
 6 |     notice = dedent('''
 7 |     NOTE: In order to use the FileCache you must have
 8 |     lockfile installed. You can install it via pip:
 9 |       pip install lockfile
10 |     ''')
11 |     print(notice)
12 | 
13 | 
14 | try:
15 |     import redis
16 |     from .redis_cache import RedisCache
17 | except ImportError:
18 |     pass
19 | 


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
 1 | from invoke import run, task
 2 | 
 3 | 
 4 | VENV = 'venv'
 5 | 
 6 | 
 7 | def env_do(tail, **kw):
 8 |     return run('%s/bin/%s' % (VENV, tail), **kw)
 9 | 
10 | 
11 | @task
12 | def virtualenv():
13 |     run('virtualenv %s' % VENV)
14 | 
15 | 
16 | @task('virtualenv')
17 | def bootstrap():
18 |     env_do('pip install -r dev_requirements.txt')
19 | 
20 | 
21 | @task
22 | def clean_env():
23 |     run('rm -r venv')
24 | 
25 | 
26 | @task
27 | def test_all():
28 |     env_do('tox')
29 | 
30 | 
31 | @task
32 | def docs():
33 |     run('cd docs && make html')
34 | 
35 | 
36 | @task
37 | def release(part):
38 |     env_do('bumpversion %s' % part)
39 |     run('git push origin master')
40 |     run('git push --tags origin master')
41 |     env_do('python setup.py sdist upload')
42 | 


--------------------------------------------------------------------------------
/cachecontrol/compat.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from urllib.parse import urljoin
 3 | except ImportError:
 4 |     from urlparse import urljoin
 5 | 
 6 | 
 7 | try:
 8 |     import email.utils
 9 |     parsedate_tz = email.utils.parsedate_tz
10 | except ImportError:
11 |     import email.Utils
12 |     parsedate_tz = email.Utils.parsedate_tz
13 | 
14 | 
15 | try:
16 |     import cPickle as pickle
17 | except ImportError:
18 |     import pickle
19 | 
20 | 
21 | # Handle the case where the requests has been patched to not have urllib3
22 | # bundled as part of it's source.
23 | try:
24 |     from requests.packages.urllib3.response import HTTPResponse
25 | except ImportError:
26 |     from urllib3.response import HTTPResponse
27 | 
28 | try:
29 |     from requests.packages.urllib3.util import is_fp_closed
30 | except ImportError:
31 |     from urllib3.util import is_fp_closed
32 | 


--------------------------------------------------------------------------------
/cachecontrol/cache.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The cache object API for implementing caches. The default is just a
 3 | dictionary, which in turns means it is not threadsafe for writing.
 4 | """
 5 | from threading import Lock
 6 | 
 7 | 
 8 | class BaseCache(object):
 9 | 
10 |     def get(self, key):
11 |         raise NotImplemented()
12 | 
13 |     def set(self, key, value):
14 |         raise NotImplemented()
15 | 
16 |     def delete(self, key):
17 |         raise NotImplemented()
18 | 
19 | 
20 | class DictCache(BaseCache):
21 | 
22 |     def __init__(self, init_dict=None):
23 |         self.lock = Lock()
24 |         self.data = init_dict or {}
25 | 
26 |     def get(self, key):
27 |         return self.data.get(key, None)
28 | 
29 |     def set(self, key, value):
30 |         with self.lock:
31 |             self.data.update({key: value})
32 | 
33 |     def delete(self, key):
34 |         with self.lock:
35 |             if key in self.data:
36 |                 self.data.pop(key)
37 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | long_description = open('README.rst').read()
 4 | 
 5 | VERSION = '0.9.3'
 6 | 
 7 | setup_params = dict(
 8 |     name='CacheControl',
 9 |     version=VERSION,
10 |     author='Eric Larson',
11 |     author_email='eric@ionrock.org',
12 |     license='MIT',
13 |     url='https://github.com/ionrock/cachecontrol',
14 |     keywords='requests http caching web',
15 |     packages=setuptools.find_packages(),
16 |     description='httplib2 caching for requests',
17 |     long_description=long_description,
18 |     install_requires=[
19 |         'requests',
20 |     ],
21 |     classifiers=[
22 |         'Development Status :: 4 - Beta',
23 |         'Environment :: Web Environment',
24 |         'License :: OSI Approved :: MIT License',
25 |         'Operating System :: OS Independent',
26 |         'Programming Language :: Python',
27 |         'Topic :: Internet :: WWW/HTTP',
28 |     ],
29 | )
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     setuptools.setup(**setup_params)
34 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ==============
 2 |  CacheControl
 3 | ==============
 4 | 
 5 | CacheControl is a port of the caching algorithms in httplib2_ for use with
 6 | requests_ session object.
 7 | 
 8 | It was written because httplib2's better support for caching is often
 9 | mitigated by its lack of threadsafety. The same is true of requests in
10 | terms of caching.
11 | 
12 | 
13 | Quickstart
14 | ==========
15 | 
16 | ::
17 | 
18 |   import requests
19 | 
20 |   from cachecontrol import CacheControl
21 | 
22 | 
23 |   sess = requests.session()
24 |   cached_sess = CacheControl(sess)
25 | 
26 |   response = cached_sess.get('http://google.com')
27 | 
28 | If the URL contains any caching based headers, it will cache the
29 | result in a simple dictionary.
30 | 
31 | For more info, check out the docs_
32 | 
33 | .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master
34 |   :target: https://travis-ci.org/ionrock/cachecontrol
35 | 
36 | .. _docs: http://cachecontrol.readthedocs.org/en/latest/
37 | .. _httplib2: http://code.google.com/p/httplib2/
38 | .. _requests: http://docs.python-requests.org/
39 | 


--------------------------------------------------------------------------------
/cachecontrol/filewrapper.py:
--------------------------------------------------------------------------------
 1 | from .compat import is_fp_closed
 2 | 
 3 | 
 4 | class CallbackFileWrapper(object):
 5 |     """
 6 |     Small wrapper around a fp object which will tee everything read into a
 7 |     buffer, and when that file is closed it will execute a callback with the
 8 |     contents of that buffer.
 9 | 
10 |     All attributes are proxied to the underlying file object.
11 | 
12 |     This class uses members with a double underscore (__) leading prefix so as
13 |     not to accidentally shadow an attribute.
14 |     """
15 | 
16 |     def __init__(self, fp, callback):
17 |         self.__buf = b""
18 |         self.__fp = fp
19 |         self.__callback = callback
20 | 
21 |     def __getattr__(self, name):
22 |         return getattr(self.__fp, name)
23 | 
24 |     def read(self, amt=None):
25 |         data = self.__fp.read(amt)
26 |         self.__buf += data
27 | 
28 |         # Is this the best way to figure out if the file has been completely
29 |         #   consumed?
30 |         if is_fp_closed(self.__fp):
31 |             self.__callback(self.__buf)
32 | 
33 |         return data
34 | 


--------------------------------------------------------------------------------
/cachecontrol/caches/redis_cache.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | def total_seconds(td):
 7 |     """Python 2.6 compatability"""
 8 |     if hasattr(td, 'total_seconds'):
 9 |         return td.total_seconds()
10 | 
11 |     ms = td.microseconds
12 |     secs = (td.seconds + td.days * 24 * 3600)
13 |     return (ms + secs * 10**6) / 10**6
14 | 
15 | 
16 | class RedisCache(object):
17 | 
18 |     def __init__(self, conn):
19 |         self.conn = conn
20 | 
21 |     def get(self, key):
22 |         return self.conn.get(key)
23 | 
24 |     def set(self, key, value, expires=None):
25 |         if not expires:
26 |             self.conn.set(key, value)
27 |         else:
28 |             expires = expires - datetime.now()
29 |             self.conn.setex(key, total_seconds(expires), value)
30 | 
31 |     def delete(self, key):
32 |         self.conn.delete(key)
33 | 
34 |     def clear(self):
35 |         """Helper for clearing all the keys in a database. Use with
36 |         caution!"""
37 |         for key in self.conn.keys():
38 |             self.conn.delete(key)
39 | 


--------------------------------------------------------------------------------
/tests/test_adapter.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from requests import Session
 4 | from cachecontrol.adapter import CacheControlAdapter
 5 | from cachecontrol.wrapper import CacheControl
 6 | 
 7 | 
 8 | def use_wrapper():
 9 |     print('Using helper')
10 |     sess = CacheControl(Session())
11 |     return sess
12 | 
13 | 
14 | def use_adapter():
15 |     print('Using adapter')
16 |     sess = Session()
17 |     sess.mount('http://', CacheControlAdapter())
18 |     return sess
19 | 
20 | 
21 | @pytest.fixture(params=[use_adapter, use_wrapper])
22 | def sess(url, request):
23 |     sess = request.param()
24 |     sess.get(url)
25 |     return sess
26 | 
27 | 
28 | class TestSessionActions(object):
29 | 
30 |     def test_get_caches(self, url, sess):
31 |         r2 = sess.get(url)
32 |         assert r2.from_cache is True
33 | 
34 |     def test_get_with_no_cache_does_not_cache(self, url, sess):
35 |         r2 = sess.get(url, headers={'Cache-Control': 'no-cache'})
36 |         assert not r2.from_cache
37 | 
38 |     def test_put_invalidates_cache(self, url, sess):
39 |         r2 = sess.put(url, data={'foo': 'bar'})
40 |         sess.get(url)
41 |         assert not r2.from_cache
42 | 
43 |     def test_delete_invalidates_cache(self, url, sess):
44 |         r2 = sess.delete(url)
45 |         sess.get(url)
46 |         assert not r2.from_cache
47 | 


--------------------------------------------------------------------------------
/examples/benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import requests
 3 | import argparse
 4 | 
 5 | from multiprocessing import Process
 6 | from datetime import datetime
 7 | from wsgiref.simple_server import make_server
 8 | from cachecontrol import CacheControl
 9 | 
10 | HOST = 'localhost'
11 | PORT = 8050
12 | URL = 'http://{0}:{1}/'.format(HOST, PORT)
13 | 
14 | 
15 | class Server(object):
16 | 
17 |     def __call__(self, env, sr):
18 |         body = 'Hello World!'
19 |         status = '200 OK'
20 |         headers = [
21 |             ('Cache-Control', 'max-age=%i' % (60 * 10)),
22 |             ('Content-Type', 'text/plain'),
23 |         ]
24 |         sr(status, headers)
25 |         return body
26 | 
27 | 
28 | def start_server():
29 |     httpd = make_server(HOST, PORT, Server())
30 |     httpd.serve_forever()
31 | 
32 | 
33 | def run_benchmark(sess):
34 |     proc = Process(target=start_server)
35 |     proc.start()
36 | 
37 |     start = datetime.now()
38 |     for i in xrange(0, 1000):
39 |         sess.get(URL)
40 |         sys.stdout.write('.')
41 |     end = datetime.now()
42 |     print()
43 | 
44 |     total = end - start
45 |     print('Total time for 1000 requests: %s' % total)
46 |     proc.terminate()
47 | 
48 | 
49 | def run():
50 |     parser = argparse.ArgumentParser()
51 |     parser.add_argument('-n', '--no-cache',
52 |                         default=False,
53 |                         action='store_true',
54 |                         help='Do not use cachecontrol')
55 |     args = parser.parse_args()
56 | 
57 |     sess = requests.Session()
58 |     if not args.no_cache:
59 |         sess = CacheControl(sess)
60 | 
61 |     run_benchmark(sess)
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     run()
66 | 


--------------------------------------------------------------------------------
/tests/test_storage_filecache.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Unit tests that verify FileCache storage works correctly.
 3 | """
 4 | 
 5 | import string
 6 | 
 7 | from random import randint, sample
 8 | 
 9 | import pytest
10 | import requests
11 | from cachecontrol import CacheControl
12 | from cachecontrol.caches import FileCache
13 | 
14 | STORAGE_FOLDER = ".cache"
15 | 
16 | 
17 | def randomdata():
18 |     """Plain random http data generator:"""
19 |     key = ''.join(sample(string.ascii_lowercase, randint(2, 4)))
20 |     val = ''.join(sample(string.ascii_lowercase + string.digits,
21 |                          randint(2, 10)))
22 |     return '&{0}={1}'.format(key, val)
23 | 
24 | 
25 | class TestStorageFileCache(object):
26 | 
27 |     @pytest.fixture()
28 |     def sess(self, server):
29 |         self.url = server.application_url
30 |         self.cache = FileCache(STORAGE_FOLDER)
31 |         sess = CacheControl(requests.Session(), cache=self.cache)
32 |         return sess
33 | 
34 |     def test_filecache_from_cache(self, sess):
35 |         response = sess.get(self.url)
36 |         assert not response.from_cache
37 |         response = sess.get(self.url)
38 |         assert response.from_cache
39 | 
40 |     def test_key_length(self, sess):
41 |         """
42 |         Hash table keys:
43 |            Most file systems have a 255 characters path limitation.
44 |               * Make sure hash method does not produce too long keys
45 |               * Ideally hash method generate fixed length keys
46 |         """
47 |         url0 = url1 = 'http://example.org/res?a=1'
48 |         while len(url0) < 255:
49 |             url0 += randomdata()
50 |             url1 += randomdata()
51 |         assert len(self.cache.encode(url0)) < 200
52 |         assert len(self.cache.encode(url0)) == len(self.cache.encode(url1))
53 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
 1 | ====================
 2 |  Using CacheControl
 3 | ====================
 4 | 
 5 | CacheControl assumes you are using a `requests.Session` for your
 6 | requests. If you are making ad-hoc requests using `requests.get` then
 7 | you probably are not terribly concerned about caching.
 8 | 
 9 | There are two way to use CacheControl, via the wrapper and the
10 | adapter.
11 | 
12 | 
13 | Wrapper
14 | =======
15 | 
16 | The easiest way to use CacheControl is to utilize the basic
17 | wrapper. Here is an example: ::
18 | 
19 |   import requests
20 |   import cachecontrol
21 | 
22 |   sess = cachecontrol.CacheControl(requests.Session())
23 |   resp = sess.get('http://google.com')
24 | 
25 | This uses the default cache store, a thread safe in-memory dictionary.
26 | 
27 | 
28 | Adapter
29 | =======
30 | 
31 | The other way to use CacheControl is via a requests `Transport
32 | Adapter`_.
33 | 
34 | Here is how the adapter works: ::
35 | 
36 |   import requests
37 |   import cachecontrol
38 | 
39 |   sess = requests.Session()
40 |   sess.mount('http://', CacheControlAdapter())
41 | 
42 |   resp = sess.get('http://google.com')
43 | 
44 | 
45 | Under the hood, the wrapper method of using CacheControl mentioned
46 | above is the same as this example.
47 | 
48 | 
49 | Use a Different Cache Store
50 | ===========================
51 | 
52 | Both the wrapper and adapter classes allow providing a custom cache
53 | store object for storing your cached data. Here is an example using
54 | the provided `FileCache` from CacheControl: ::
55 | 
56 |   import requests
57 | 
58 |   from cachecontrol import CacheControl
59 | 
60 |   # NOTE: This requires lockfile be installed
61 |   from cachecontrol.caches import FileCache
62 | 
63 |   sess = CacheControl(requests.Session(),
64 |                       cache=FileCache('.webcache'))
65 | 
66 | 
67 | The `FileCache` will create a directory called `.webcache` and store a
68 | file for each cached request.
69 | 
70 | 
71 | 
72 | .. _Transport Adapter: http://docs.python-requests.org/en/latest/user/advanced/#transport-adapters
73 | 


--------------------------------------------------------------------------------
/tests/test_max_age.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import pytest
 3 | 
 4 | from requests import Session
 5 | from cachecontrol.adapter import CacheControlAdapter
 6 | from cachecontrol.cache import DictCache
 7 | 
 8 | 
 9 | class NullSerializer(object):
10 | 
11 |     def dumps(self, request, response, body=None):
12 |         return response
13 | 
14 |     def loads(self, request, data):
15 |         return data
16 | 
17 | 
18 | class TestMaxAge(object):
19 | 
20 |     @pytest.fixture()
21 |     def sess(self, server):
22 |         self.url = server.application_url
23 |         self.cache = DictCache()
24 |         sess = Session()
25 |         sess.mount(
26 |             'http://',
27 |             CacheControlAdapter(self.cache, serializer=NullSerializer()),
28 |         )
29 |         return sess
30 | 
31 |     def test_client_max_age_0(self, sess):
32 |         """
33 |         Making sure when the client uses max-age=0 we don't get a
34 |         cached copy even though we're still fresh.
35 |         """
36 |         print('first request')
37 |         r = sess.get(self.url)
38 |         assert self.cache.get(self.url) == r.raw
39 | 
40 |         print('second request')
41 |         r = sess.get(self.url, headers={'Cache-Control': 'max-age=0'})
42 | 
43 |         # don't remove from the cache
44 |         assert self.cache.get(self.url)
45 |         assert not r.from_cache
46 | 
47 |     def test_client_max_age_3600(self, sess):
48 |         """
49 |         Verify we get a cached value when the client has a
50 |         reasonable max-age value.
51 |         """
52 |         r = sess.get(self.url)
53 |         assert self.cache.get(self.url) == r.raw
54 | 
55 |         # request that we don't want a new one unless
56 |         r = sess.get(self.url, headers={'Cache-Control': 'max-age=3600'})
57 |         assert r.from_cache is True
58 | 
59 |         # now lets grab one that forces a new request b/c the cache
60 |         # has expired. To do that we'll inject a new time value.
61 |         resp = self.cache.get(self.url)
62 |         resp.headers['date'] = 'Tue, 15 Nov 1994 08:12:31 GMT'
63 |         r = sess.get(self.url)
64 |         assert not r.from_cache
65 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. CacheControl documentation master file, created by
 2 |    sphinx-quickstart on Mon Nov  4 15:01:23 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to CacheControl's documentation!
 7 | ========================================
 8 | 
 9 | CacheControl is a port of the caching algorithms in httplib2_ for use with
10 | requests_ session object.
11 | 
12 | It was written because httplib2's better support for caching is often
13 | mitigated by its lack of threadsafety. The same is true of requests in
14 | terms of caching.
15 | 
16 | 
17 | Install
18 | =======
19 | 
20 | CacheControl is available from PyPI_. You can install it with pip_ ::
21 | 
22 |   $ pip install CacheControl
23 | 
24 | Some of the included cache storage classes have external
25 | requirements. See :doc:`storage` for more info.
26 | 
27 | 
28 | 
29 | Quick Start
30 | ===========
31 | 
32 | For the impatient, here is how to get started using CacheControl ::
33 | 
34 |   import requests
35 | 
36 |   from cachecontrol import CacheControl
37 | 
38 | 
39 |   sess = requests.session()
40 |   cached_sess = CacheControl(sess)
41 | 
42 |   response = cached_sess.get('http://google.com')
43 | 
44 | 
45 | This uses a threadsafe in memory dictionary for storage.
46 | 
47 | 
48 | Tests
49 | =====
50 | 
51 | The tests are all in cachecontrol/tests and is runnable by py.test.
52 | 
53 | 
54 | Disclaimers
55 | ===========
56 | 
57 | CacheControl is relatively new and maybe have bugs. I have made an
58 | effort to faithfully port the tests from httplib2 to CacheControl, but
59 | there is a decent chance that I've missed something. Please file bugs
60 | if you find any issues!
61 | 
62 | With that in mind, CacheControl has been used sucessfully in a
63 | production environments, replacing httplib2's usage.
64 | 
65 | If you give it a try, please let me know of any issues.
66 | 
67 | 
68 | .. _httplib2: http://code.google.com/p/httplib2/
69 | .. _requests: http://docs.python-requests.org/
70 | .. _Editing the Web: http://www.w3.org/1999/04/Editing/
71 | .. _PyPI: https://pypi.python.org/pypi/CacheControl/
72 | .. _pip: http://www.pip-installer.org/
73 | 
74 | 
75 | Contents:
76 | 
77 | .. toctree::
78 |    :maxdepth: 2
79 | 
80 |    usage
81 |    storage
82 |    etags
83 |    tips
84 | 
85 | 
86 | 
87 | Indices and tables
88 | ==================
89 | 
90 | * :ref:`genindex`
91 | * :ref:`modindex`
92 | * :ref:`search`
93 | 


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | from pprint import pformat
 2 | 
 3 | import pytest
 4 | 
 5 | from webtest.http import StopableWSGIServer
 6 | 
 7 | 
 8 | class SimpleApp(object):
 9 | 
10 |     def __init__(self):
11 |         self.etag_count = 0
12 |         self.update_etag_string()
13 | 
14 |     def dispatch(self, env):
15 |         path = env['PATH_INFO'][1:].split('/')
16 |         segment = path.pop(0)
17 |         if segment and hasattr(self, segment):
18 |             return getattr(self, segment)
19 |         return None
20 | 
21 |     def vary_accept(self, env, start_response):
22 |         headers = [
23 |             ('Cache-Control', 'max-age=5000'),
24 |             ('Content-Type', 'text/plain'),
25 |             ('Vary', 'Accept-Encoding, Accept'),
26 |         ]
27 |         start_response('200 OK', headers)
28 |         return [pformat(env).encode("utf8")]
29 | 
30 |     def update_etag_string(self):
31 |         self.etag_count += 1
32 |         self.etag_string = '"ETAG-{0}"'.format(self.etag_count)
33 | 
34 |     def update_etag(self, env, start_response):
35 |         self.update_etag_string()
36 |         headers = [
37 |             ('Cache-Control', 'max-age=5000'),
38 |             ('Content-Type', 'text/plain'),
39 |         ]
40 |         start_response('200 OK', headers)
41 |         return [pformat(env).encode("utf8")]
42 | 
43 |     def etag(self, env, start_response):
44 |         headers = [
45 |             ('Etag', self.etag_string),
46 |         ]
47 |         if env.get('HTTP_IF_NONE_MATCH') == self.etag_string:
48 |             start_response('304 Not Modified', headers)
49 |         else:
50 |             start_response('200 OK', headers)
51 |         return [pformat(env).encode("utf8")]
52 | 
53 |     def __call__(self, env, start_response):
54 |         func = self.dispatch(env)
55 | 
56 |         if func:
57 |             return func(env, start_response)
58 | 
59 |         headers = [
60 |             ('Cache-Control', 'max-age=5000'),
61 |             ('Content-Type', 'text/plain'),
62 |         ]
63 |         start_response('200 OK', headers)
64 |         return [pformat(env).encode("utf8")]
65 | 
66 | 
67 | @pytest.fixture(scope='session')
68 | def server():
69 |     return pytest.server
70 | 
71 | 
72 | @pytest.fixture()
73 | def url(server):
74 |     return server.application_url
75 | 
76 | 
77 | def pytest_namespace():
78 |     return dict(server=StopableWSGIServer.create(SimpleApp()))
79 | 
80 | 
81 | def pytest_unconfigure(config):
82 |     pytest.server.shutdown()
83 | 


--------------------------------------------------------------------------------
/docs/tips.rst:
--------------------------------------------------------------------------------
 1 | =========================
 2 |  Tips and Best Practices
 3 | =========================
 4 | 
 5 | Caching is hard! It is considered one of the great challenges of
 6 | computer science. Fortunatley, the HTTP spec helps to navigate some
 7 | pitfalls of invalidation using stale responses. Below are some
 8 | suggestions and best practices to help avoid the more subtle issues
 9 | that can crop up using CacheControl and HTTP caching.
10 | 
11 | If you have a suggetions please create a new issue in `github
12 | <https://github.com/ionrock/cachecontrol/issues/>`_ and let folks know
13 | what you ran into and how you fixed it.
14 | 
15 | 
16 | Timezones
17 | =========
18 | 
19 | It is important to remember that the times reported by a server may or
20 | may not be timezone aware. If you are using CacheControl with a
21 | service you control, make sure any timestamps are used consistently,
22 | especially if requests might cross timezones.
23 | 
24 | 
25 | Cached Responses
26 | ================
27 | 
28 | We've done our best to make sure cached responses act like a normal
29 | response, but there are aspects that are different for somewhat
30 | obvious reasons.
31 | 
32 |  - Cached responses are never streaming
33 |  - Cached repsonses have `None` for the `raw` attribute
34 | 
35 | Obviously, when you cache a response, you have downloaded the entire
36 | body. Therefore, there is never a use case for streaming a cached
37 | response.
38 | 
39 | With that in mind, you should be aware that if you try to cache a very
40 | large response on a network store, you still might have some latency
41 | tranferring the data from the network store to your
42 | application. Another consideration is storing large responses in a
43 | `FileCache`. If you are caching using ETags and the server is
44 | extremely specific as to what constitutes an equivalent request, it
45 | could provide many different responses for essentially the same data
46 | within the context of your application.
47 | 
48 | 
49 | Query String Params
50 | ===================
51 | 
52 | If you are caching requests that use a large number of query string
53 | parameters, consider sorting them to ensure that the request is
54 | properly cached.
55 | 
56 | Requests supports passing both dictionaries and lists of tuples as the
57 | param argument in a request. For example: ::
58 | 
59 |   requests.get(url, params=sorted([('foo', 'one'), ('bar', 'two')]))
60 | 
61 | By ordering your params, you can be sure the cache key will be
62 | consistent across requests and you are caching effectively.
63 | 


--------------------------------------------------------------------------------
/cachecontrol/caches/file_cache.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import os
 3 | 
 4 | from lockfile import FileLock
 5 | 
 6 | 
 7 | def _secure_open_write(filename, fmode):
 8 |     # We only want to write to this file, so open it in write only mode
 9 |     flags = os.O_WRONLY
10 | 
11 |     # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
12 |     #  will open *new* files.
13 |     # We specify this because we want to ensure that the mode we pass is the
14 |     # mode of the file.
15 |     flags |= os.O_CREAT | os.O_EXCL
16 | 
17 |     # Do not follow symlinks to prevent someone from making a symlink that
18 |     # we follow and insecurely open a cache file.
19 |     if hasattr(os, "O_NOFOLLOW"):
20 |         flags |= os.O_NOFOLLOW
21 | 
22 |     # On Windows we'll mark this file as binary
23 |     if hasattr(os, "O_BINARY"):
24 |         flags |= os.O_BINARY
25 | 
26 |     # Before we open our file, we want to delete any existing file that is
27 |     # there
28 |     try:
29 |         os.remove(filename)
30 |     except (IOError, OSError):
31 |         # The file must not exist already, so we can just skip ahead to opening
32 |         pass
33 | 
34 |     # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
35 |     # race condition happens between the os.remove and this line, that an
36 |     # error will be raised. Because we utilize a lockfile this should only
37 |     # happen if someone is attempting to attack us.
38 |     fd = os.open(filename, flags, fmode)
39 |     try:
40 |         return os.fdopen(fd, "wb")
41 |     except:
42 |         # An error occurred wrapping our FD in a file object
43 |         os.close(fd)
44 |         raise
45 | 
46 | 
47 | class FileCache(object):
48 |     def __init__(self, directory, forever=False, filemode=0o0600,
49 |                  dirmode=0o0700):
50 |         self.directory = directory
51 |         self.forever = forever
52 |         self.filemode = filemode
53 | 
54 |         if not os.path.isdir(self.directory):
55 |             os.makedirs(self.directory, dirmode)
56 | 
57 |     @staticmethod
58 |     def encode(x):
59 |         return hashlib.sha224(x.encode()).hexdigest()
60 | 
61 |     def _fn(self, name):
62 |         return os.path.join(self.directory, self.encode(name))
63 | 
64 |     def get(self, key):
65 |         name = self._fn(key)
66 |         if not os.path.exists(name):
67 |             return None
68 | 
69 |         with open(name, 'rb') as fh:
70 |             return fh.read()
71 | 
72 |     def set(self, key, value):
73 |         name = self._fn(key)
74 |         with FileLock(name) as lock:
75 |             with _secure_open_write(lock.path, self.filemode) as fh:
76 |                 fh.write(value)
77 | 
78 |     def delete(self, key):
79 |         name = self._fn(key)
80 |         if not self.forever:
81 |             os.remove(name)
82 | 


--------------------------------------------------------------------------------
/tests/test_vary.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import requests
 3 | 
 4 | from cachecontrol import CacheControl
 5 | from cachecontrol.cache import DictCache
 6 | from cachecontrol.compat import urljoin
 7 | 
 8 | 
 9 | class TestVary(object):
10 | 
11 |     @pytest.fixture()
12 |     def sess(self, server):
13 |         self.url = urljoin(server.application_url, '/vary_accept')
14 |         self.cache = DictCache()
15 |         sess = CacheControl(requests.Session(), cache=self.cache)
16 |         return sess
17 | 
18 |     def cached_equal(self, cached, resp):
19 |         checks = [
20 |             cached._fp.getvalue() == resp.content,
21 |             cached.headers == resp.raw.headers,
22 |             cached.status == resp.raw.status,
23 |             cached.version == resp.raw.version,
24 |             cached.reason == resp.raw.reason,
25 |             cached.strict == resp.raw.strict,
26 |             cached.decode_content == resp.raw.decode_content,
27 |         ]
28 |         return all(checks)
29 | 
30 |     def test_vary_example(self, sess):
31 |         """RFC 2616 13.6
32 | 
33 |         When the cache receives a subsequent request whose Request-URI
34 |         specifies one or more cache entries including a Vary header field,
35 |         the cache MUST NOT use such a cache entry to construct a response
36 |         to the new request unless all of the selecting request-headers
37 |         present in the new request match the corresponding stored
38 |         request-headers in the original request.
39 | 
40 |         Or, in simpler terms, when you make a request and the server
41 |         returns defines a Vary header, unless all the headers listed
42 |         in the Vary header are the same, it won't use the cached
43 |         value.
44 |         """
45 |         s = sess.adapters["http://"].controller.serializer
46 |         r = sess.get(self.url)
47 |         c = s.loads(r.request, self.cache.get(self.url))
48 | 
49 |         # make sure we cached it
50 |         assert self.cached_equal(c, r)
51 | 
52 |         # make the same request
53 |         resp = sess.get(self.url)
54 |         assert self.cached_equal(c, resp)
55 |         assert resp.from_cache
56 | 
57 |         # make a similar request, changing the accept header
58 |         resp = sess.get(self.url, headers={'Accept': 'text/plain, text/html'})
59 |         assert not self.cached_equal(c, resp)
60 |         assert not resp.from_cache
61 | 
62 |         # Just confirming two things here:
63 |         #
64 |         #   1) The server used the vary header
65 |         #   2) We have more than one header we vary on
66 |         #
67 |         # The reason for this is that when we don't specify the header
68 |         # in the request, it is considered the same in terms of
69 |         # whether or not to use the cached value.
70 |         assert 'vary' in r.headers
71 |         assert len(r.headers['vary'].replace(' ', '').split(',')) == 2
72 | 


--------------------------------------------------------------------------------
/docs/storage.rst:
--------------------------------------------------------------------------------
 1 | ====================
 2 |  Storing Cache Data
 3 | ====================
 4 | 
 5 | CacheControl comes with a few storage backends for storing your
 6 | cache'd objects.
 7 | 
 8 | 
 9 | DictCache
10 | =========
11 | 
12 | The `DictCache` is the default cache used when no other is
13 | provided. It is a simple threadsafe dictionary. It doesn't try to do
14 | anything smart about deadlocks or forcing a busted cache, but it
15 | should be reasonably safe to use.
16 | 
17 | Also, the `DictCache` does not transform the request or response
18 | objects in anyway. Therefore it is unlikely you could persist the
19 | entire cache to disk. The converse is that it should be very fast.
20 | 
21 | 
22 | FileCache
23 | =========
24 | 
25 | The `FileCache` is similar to the caching mechanism provided by
26 | httplib2_. It requires `lockfile`_ be installed as it prevents
27 | multiple threads from writing to the same file at the same time.
28 | 
29 | Here is an example using the `FileCache`: ::
30 | 
31 |   import requests
32 |   from cachecontrol import CacheControl
33 |   from cachecontrol.caches import FileCache
34 | 
35 |   sess = CacheControl(requests.Session(),
36 |                       cache=FileCache('.web_cache'))
37 | 
38 | 
39 | The `FileCache` supports a `forever` flag that disables deleting from
40 | the cache. This can be helpful in debugging applications that make
41 | many web requests that you don't want to repeat. It also can be
42 | helpful in testing. Here is an example of how to use it: ::
43 | 
44 |   forever_cache = FileCache('.web_cache', forever=True)
45 |   sess = CacheControl(requests.Session(), forever_cache)
46 | 
47 | 
48 | :A Note About Pickle:
49 | 
50 |   It should be noted that the `FileCache` uses pickle to store the
51 |   cached response. Prior to `requests 2.1`_, `requests.Response`
52 |   objects were not 'pickleable' due to the use of `IOBase` base
53 |   classes in `urllib3` `HTTPResponse` objects. In CacheControl we work
54 |   around this by patching the Response objects with the appropriate
55 |   `__getstate__` and `__setstate__` methods when the requests version
56 |   doesn't natively support Response pickling.
57 | 
58 | 
59 | 
60 | RedisCache
61 | ==========
62 | 
63 | The `RedisCache` uses a Redis database to store values. The values are
64 | stored as strings in redis, which means the get, set and delete
65 | actions are used.
66 | 
67 | The `RedisCache` also provides a clear method to delete all keys in a
68 | database. Obviously, this should be used with caution as it is naive
69 | and works iteratively, looping over each key and deleting it.
70 | 
71 | Here is an example using a `RedisCache`: ::
72 | 
73 |   import redis
74 |   import requests
75 |   from cachecontrol import CacheControl
76 |   from cachecontrol.caches import RedisCache
77 | 
78 | 
79 |   pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
80 |   r = redis.Redis(connection_pool=pool)
81 |   sess = CacheControl(requests.Session(), RedisCache(r))
82 | 
83 | This is primarily a proof of concept, so please file bugs if there is
84 | a better method for utilizing redis as a cache.
85 | 
86 | 
87 | .. _httplib2: http://code.google.com/p/httplib2/
88 | .. _lockfile: https://github.com/smontanaro/pylockfile
89 | .. _requests 2.1: http://docs.python-requests.org/en/latest/community/updates/#id2
90 | 


--------------------------------------------------------------------------------
/cachecontrol/adapter.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | from requests.adapters import HTTPAdapter
 4 | 
 5 | from .controller import CacheController
 6 | from .cache import DictCache
 7 | from .filewrapper import CallbackFileWrapper
 8 | 
 9 | 
10 | class CacheControlAdapter(HTTPAdapter):
11 |     invalidating_methods = set(['PUT', 'DELETE'])
12 | 
13 |     def __init__(self, cache=None, cache_etags=True, controller_class=None,
14 |                  serializer=None, *args, **kw):
15 |         super(CacheControlAdapter, self).__init__(*args, **kw)
16 |         self.cache = cache or DictCache()
17 | 
18 |         controller_factory = controller_class or CacheController
19 |         self.controller = controller_factory(
20 |             self.cache,
21 |             cache_etags=cache_etags,
22 |             serializer=serializer,
23 |         )
24 | 
25 |     def send(self, request, **kw):
26 |         """
27 |         Send a request. Use the request information to see if it
28 |         exists in the cache and cache the response if we need to and can.
29 |         """
30 |         if request.method == 'GET':
31 |             cached_response = self.controller.cached_request(request)
32 |             if cached_response:
33 |                 return self.build_response(request, cached_response, from_cache=True)
34 | 
35 |             # check for etags and add headers if appropriate
36 |             request.headers.update(self.controller.conditional_headers(request))
37 | 
38 |         resp = super(CacheControlAdapter, self).send(request, **kw)
39 | 
40 |         return resp
41 | 
42 |     def build_response(self, request, response, from_cache=False):
43 |         """
44 |         Build a response by making a request or using the cache.
45 | 
46 |         This will end up calling send and returning a potentially
47 |         cached response
48 |         """
49 |         if not from_cache and request.method == 'GET':
50 |             if response.status == 304:
51 |                 # We must have sent an ETag request. This could mean
52 |                 # that we've been expired already or that we simply
53 |                 # have an etag. In either case, we want to try and
54 |                 # update the cache if that is the case.
55 |                 cached_response = self.controller.update_cached_response(
56 |                     request, response
57 |                 )
58 | 
59 |                 if cached_response is not response:
60 |                     from_cache = True
61 | 
62 |                 response = cached_response
63 |             else:
64 |                 # Wrap the response file with a wrapper that will cache the
65 |                 #   response when the stream has been consumed.
66 |                 response._fp = CallbackFileWrapper(
67 |                     response._fp,
68 |                     functools.partial(
69 |                         self.controller.cache_response,
70 |                         request,
71 |                         response,
72 |                     )
73 |                 )
74 | 
75 |         resp = super(CacheControlAdapter, self).build_response(
76 |             request, response
77 |         )
78 | 
79 |         # See if we should invalidate the cache.
80 |         if request.method in self.invalidating_methods and resp.ok:
81 |             cache_url = self.controller.cache_url(request.url)
82 |             self.cache.delete(cache_url)
83 | 
84 |         # Give the request a from_cache attr to let people use it
85 |         resp.from_cache = from_cache
86 | 
87 |         return resp
88 | 


--------------------------------------------------------------------------------
/cachecontrol/serialize.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | 
 3 | from requests.structures import CaseInsensitiveDict
 4 | 
 5 | from .compat import HTTPResponse, pickle
 6 | 
 7 | 
 8 | class Serializer(object):
 9 | 
10 |     def dumps(self, request, response, body=None):
11 |         response_headers = CaseInsensitiveDict(response.headers)
12 | 
13 |         if body is None:
14 |             body = response.read(decode_content=False)
15 |             response._fp = io.BytesIO(body)
16 | 
17 |         data = {
18 |             "response": {
19 |                 "body": body,
20 |                 "headers": response.headers,
21 |                 "status": response.status,
22 |                 "version": response.version,
23 |                 "reason": response.reason,
24 |                 "strict": response.strict,
25 |                 "decode_content": response.decode_content,
26 |             },
27 |         }
28 | 
29 |         # Construct our vary headers
30 |         data["vary"] = {}
31 |         if "vary" in response_headers:
32 |             varied_headers = response_headers['vary'].split(',')
33 |             for header in varied_headers:
34 |                 header = header.strip()
35 |                 data["vary"][header] = request.headers.get(header, None)
36 | 
37 |         return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL)
38 | 
39 |     def loads(self, request, data):
40 |         # Short circuit if we've been given an empty set of data
41 |         if not data:
42 |             return
43 | 
44 |         # Determine what version of the serializer the data was serialized
45 |         # with
46 |         try:
47 |             ver, data = data.split(b",", 1)
48 |         except ValueError:
49 |             ver = b"cc=0"
50 | 
51 |         # Make sure that our "ver" is actually a version and isn't a false
52 |         # positive from a , being in the data stream.
53 |         if ver[:3] != b"cc=":
54 |             data = ver + data
55 |             ver = b"cc=0"
56 | 
57 |         # Get the version number out of the cc=N
58 |         ver = ver.split(b"=", 1)[-1].decode("ascii")
59 | 
60 |         # Dispatch to the actual load method for the given version
61 |         try:
62 |             return getattr(self, "_loads_v{0}".format(ver))(request, data)
63 |         except AttributeError:
64 |             # This is a version we don't have a loads function for, so we'll
65 |             # just treat it as a miss and return None
66 |             return
67 | 
68 |     def _loads_v0(self, request, data):
69 |         # The original legacy cache data. This doesn't contain enough
70 |         # information to construct everything we need, so we'll treat this as
71 |         # a miss.
72 |         return
73 | 
74 |     def _loads_v1(self, request, data):
75 |         try:
76 |             cached = pickle.loads(data)
77 |         except ValueError:
78 |             return
79 | 
80 |         # Special case the '*' Vary value as it means we cannot actually
81 |         # determine if the cached response is suitable for this request.
82 |         if "*" in cached.get("vary", {}):
83 |             return
84 | 
85 |         # Ensure that the Vary headers for the cached response match our
86 |         # request
87 |         for header, value in cached.get("vary", {}).items():
88 |             if request.headers.get(header, None) != value:
89 |                 return
90 | 
91 |         body = io.BytesIO(cached["response"].pop("body"))
92 |         return HTTPResponse(
93 |             body=body,
94 |             preload_content=False,
95 |             **cached["response"]
96 |         )
97 | 


--------------------------------------------------------------------------------
/tests/test_etag.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import requests
  3 | 
  4 | from cachecontrol import CacheControl
  5 | from cachecontrol.cache import DictCache
  6 | from cachecontrol.compat import urljoin
  7 | 
  8 | 
  9 | class NullSerializer(object):
 10 | 
 11 |     def dumps(self, request, response, body=None):
 12 |         return response
 13 | 
 14 |     def loads(self, request, data):
 15 |         return data
 16 | 
 17 | 
 18 | class TestETag(object):
 19 |     """Test our equal priority caching with ETags
 20 | 
 21 |     Equal Priority Caching is a term I've defined to describe when
 22 |     ETags are cached orthgonally from Time Based Caching.
 23 |     """
 24 | 
 25 |     @pytest.fixture()
 26 |     def sess(self, server):
 27 |         self.etag_url = urljoin(server.application_url, '/etag')
 28 |         self.update_etag_url = urljoin(server.application_url, '/update_etag')
 29 |         self.cache = DictCache()
 30 |         sess = CacheControl(
 31 |             requests.Session(),
 32 |             cache=self.cache,
 33 |             serializer=NullSerializer(),
 34 |         )
 35 |         return sess
 36 | 
 37 |     def test_etags_get_example(self, sess, server):
 38 |         """RFC 2616 14.26
 39 | 
 40 |         The If-None-Match request-header field is used with a method to make
 41 |         it conditional. A client that has one or more entities previously
 42 |         obtained from the resource can verify that none of those entities
 43 |         is current by including a list of their associated entity tags in
 44 |         the If-None-Match header field. The purpose of this feature is to
 45 |         allow efficient updates of cached information with a minimum amount
 46 |         of transaction overhead
 47 | 
 48 |         If any of the entity tags match the entity tag of the entity that
 49 |         would have been returned in the response to a similar GET request
 50 |         (without the If-None-Match header) on that resource, [...] then
 51 |         the server MUST NOT perform the requested method, [...]. Instead, if
 52 |         the request method was GET or HEAD, the server SHOULD respond with
 53 |         a 304 (Not Modified) response, including the cache-related header
 54 |         fields (particularly ETag) of one of the entities that matched.
 55 | 
 56 |         (Paraphrased) A server may provide an ETag header on a response. On
 57 |         subsequent queries, the client may reference the value of this Etag
 58 |         header in an If-None-Match header; on receiving such a header, the
 59 |         server can check whether the entity at that URL has changed from the
 60 |         clients last version, and if not, it can return a 304 to indicate
 61 |         the client can use it's current representation.
 62 |         """
 63 |         r = sess.get(self.etag_url)
 64 | 
 65 |         # make sure we cached it
 66 |         assert self.cache.get(self.etag_url) == r.raw
 67 | 
 68 |         # make the same request
 69 |         resp = sess.get(self.etag_url)
 70 |         assert resp.raw == r.raw
 71 |         assert resp.from_cache
 72 | 
 73 |         # tell the server to change the etags of the response
 74 |         sess.get(self.update_etag_url)
 75 | 
 76 |         resp = sess.get(self.etag_url)
 77 |         assert resp != r
 78 |         assert not resp.from_cache
 79 | 
 80 |         # Make sure we updated our cache with the new etag'd response.
 81 |         assert self.cache.get(self.etag_url) == resp.raw
 82 | 
 83 | 
 84 | class TestDisabledETags(object):
 85 |     """Test our use of ETags when the response is stale and the
 86 |     response has an ETag.
 87 |     """
 88 |     @pytest.fixture()
 89 |     def sess(self, server):
 90 |         self.etag_url = urljoin(server.application_url, '/etag')
 91 |         self.update_etag_url = urljoin(server.application_url, '/update_etag')
 92 |         self.cache = DictCache()
 93 |         sess = CacheControl(requests.Session(),
 94 |                             cache=self.cache,
 95 |                             cache_etags=False,
 96 |                             serializer=NullSerializer())
 97 |         return sess
 98 | 
 99 |     def test_expired_etags_if_none_match_response(self, sess):
100 |         """Make sure an expired response that contains an ETag uses
101 |         the If-None-Match header.
102 |         """
103 |         # get our response
104 |         r = sess.get(self.etag_url)
105 | 
106 |         # expire our request by changing the date. Our test endpoint
107 |         # doesn't provide time base caching headers, so we add them
108 |         # here in order to expire the request.
109 |         r.headers['Date'] = 'Tue, 26 Nov 2012 00:50:49 GMT'
110 |         self.cache.set(self.etag_url, r)
111 | 
112 |         r = sess.get(self.etag_url)
113 |         assert r.from_cache
114 |         assert 'if-none-match' in r.request.headers
115 |         assert r.status_code == 200
116 | 


--------------------------------------------------------------------------------
/docs/etags.rst:
--------------------------------------------------------------------------------
  1 | ==============
  2 |  ETag Support
  3 | ==============
  4 | 
  5 | CacheControl's support of ETags is slightly different than
  6 | httplib2. In httplib2, an ETag is considered when using a cached
  7 | response when the cache is considered stale. When a cached response is
  8 | expired and it has an ETag header, it returns a response with the
  9 | appropriate `If-None-Match` header. We'll call this behavior a **Time
 10 | Priority** cache as the ETag support only takes effect when the time has
 11 | expired.
 12 | 
 13 | In CacheControl the default behavior when an ETag an sent by the
 14 | server is to cache the response. We'll refer to this pattern as a
 15 | **Equal Priority** cache as the decision to cache is either time base or
 16 | due to the presense of an ETag.
 17 | 
 18 | The spec is not explicit what takes priority when caching with both
 19 | ETags and time based headers. Therefore, CacheControl supports the
 20 | different mechanisms via configuration where possible.
 21 | 
 22 | 
 23 | Turning Off Equal Priority Caching
 24 | ==================================
 25 | 
 26 | The danger in Equal Priority Caching is that a server that returns
 27 | ETag headers for every request may fill up your cache. You can disable
 28 | Equal Priority Caching and utilize a Time Priority algorithm like
 29 | httplib2. ::
 30 | 
 31 |   import requests
 32 |   from cachecontrol import CacheControl
 33 | 
 34 |   sess = CacheControl(requests.Session(), cache_etags=False)
 35 | 
 36 | This will only utilize ETags when they exist within the context of
 37 | time based caching headers. If a response has time base caching
 38 | headers that are valid along with an ETag, we will still attempt to
 39 | handle a 304 Not Modified even though the cached value as
 40 | expired. Here is a simple example. ::
 41 | 
 42 |   # Server response
 43 |   GET /foo.html
 44 |   Date: Tue, 26 Nov 2013 00:50:49 GMT
 45 |   Cache-Control: max-age=3000
 46 |   ETag: JAsUYM8K
 47 | 
 48 | On a subsequent request, if the cache has expired, the next request
 49 | will still include the `If-None-Match` header. The cached response
 50 | will remain in the cache awaiting the response. ::
 51 | 
 52 |   # Client request
 53 |   GET /foo.html
 54 |   If-None-Match: JAsUYM8K
 55 | 
 56 | If the server returns a `304 Not Modified`, it will use the stale
 57 | cached value, updating the headers from the most recent request. ::
 58 | 
 59 |   # Server response
 60 |   GET /foo.html
 61 |   Date: Tue, 26 Nov 2013 01:30:19 GMT
 62 |   Cache-Control: max-age=3000
 63 |   ETag: JAsUYM8K
 64 | 
 65 | If the server returns a `200 OK`, the cache will be updated
 66 | accordingly.
 67 | 
 68 | 
 69 | Equal Priority Caching Benefits
 70 | ===============================
 71 | 
 72 | The benefits of equal priority caching is that you have two orthogonal
 73 | means of introducing a cache. The time based cache provides an
 74 | effective way to reduce the load on requests that can be eventually
 75 | consistent. Static resource are a great example of when time based
 76 | caching is effective.
 77 | 
 78 | The ETag based cache is effective for working with documents that are
 79 | larger and/or need to be correct immediately after changes. For
 80 | example, if you exported some data from a large database, the file
 81 | could be 10 GBs. Being able to send an ETag with this sort of request
 82 | an know the version you have locally is valid saves a ton of bandwidth
 83 | and time.
 84 | 
 85 | Likewise, if you have a resource that you want to update, you can be
 86 | confident there will not be a `lost update`_ because you have local
 87 | version that is stale.
 88 | 
 89 | 
 90 | Endpoint Specific Caching
 91 | =========================
 92 | 
 93 | It should be pointed out that there are times when an endpoint is
 94 | specifically tailored for different caching techniques. If you have a
 95 | RESTful service, there might be endpoints that are specifically meant
 96 | to be cached via time based caching techniques where as other
 97 | endpoints should focus on using ETags. In this situation it is
 98 | recommended that you use the `CacheControlAdapter` directly. ::
 99 | 
100 |   import requests
101 |   from cachecontrol import CacheControlAdapter
102 |   from cachecontrol.caches import RedisCache
103 | 
104 |   # using django for an idea on where you might get a
105 |   # username/password.
106 |   from django.conf import settings
107 | 
108 |   # a function to return a redis connection all the instances of the
109 |   # app may use. this allows updates to the API (ie PUT) to invalidate
110 |   # the cache for other users.
111 |   from myapp.db import redis_connection
112 | 
113 | 
114 |   # create our session
115 |   client = sess.Session(auth=(settings.user, settings.password))
116 | 
117 |   # we have a gettext like endpoint. this doesn't get updated very
118 |   # often so a time based cache is a helpful way to reduce many small
119 |   # requests.
120 |   client.mount('http://myapi.foo.com/gettext/',
121 |                CacheControlAdapter(cache_etags=False))
122 | 
123 | 
124 |   # here we have user profile endpoint that lets us update information
125 |   # about users. we need this to be consistent immediately after a user
126 |   # updates some information because another node might handle the
127 |   # request. It uses the global redis cache to coordinate the cache and
128 |   # uses the equal priority caching to be sure etags are used by default.
129 |   redis_cache = RedisCache(redis_connection())
130 |   client.mount('http://myapi.foo.com/user_profiles/',
131 |                CacheControlAdapter(cache=redis_cache))
132 | 
133 | Hopefully this more indepth example reveals how to configure a
134 | `requests.Session` to better utilize ETag based caching vs. Time
135 | Priority Caching.
136 | 
137 | .. _lost update: http://www.w3.org/1999/04/Editing/
138 | 


--------------------------------------------------------------------------------
/tests/test_cache_control.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Unit tests that verify our caching methods work correctly.
  3 | """
  4 | import pytest
  5 | from mock import ANY, Mock
  6 | import time
  7 | 
  8 | from cachecontrol import CacheController
  9 | from cachecontrol.cache import DictCache
 10 | 
 11 | 
 12 | TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
 13 | 
 14 | 
 15 | class NullSerializer(object):
 16 | 
 17 |     def dumps(self, request, response):
 18 |         return response
 19 | 
 20 |     def loads(self, request, data):
 21 |         return data
 22 | 
 23 | 
 24 | class TestCacheControllerResponse(object):
 25 |     url = 'http://url.com/'
 26 | 
 27 |     def req(self, headers=None):
 28 |         headers = headers or {}
 29 |         return Mock(full_url=self.url,  # < 1.x support
 30 |                     url=self.url,
 31 |                     headers=headers)
 32 | 
 33 |     def resp(self, headers=None):
 34 |         headers = headers or {}
 35 |         return Mock(status=200,
 36 |                     headers=headers,
 37 |                     request=self.req(),
 38 |                     read=lambda **k: b"testing")
 39 | 
 40 |     @pytest.fixture()
 41 |     def cc(self):
 42 |         # Cache controller fixture
 43 |         return CacheController(Mock(), serializer=Mock())
 44 | 
 45 |     def test_no_cache_non_20x_response(self, cc):
 46 |         # No caching without some extra headers, so we add them
 47 |         now = time.strftime(TIME_FMT, time.gmtime())
 48 |         resp = self.resp({'cache-control': 'max-age=3600',
 49 |                           'date': now})
 50 | 
 51 |         no_cache_codes = [201, 300, 400, 500]
 52 |         for code in no_cache_codes:
 53 |             resp.status = code
 54 |             cc.cache_response(Mock(), resp)
 55 |             assert not cc.cache.set.called
 56 | 
 57 |         # this should work b/c the resp is 20x
 58 |         resp.status = 203
 59 |         cc.cache_response(self.req(), resp)
 60 |         assert cc.serializer.dumps.called
 61 |         assert cc.cache.set.called
 62 | 
 63 |     def test_no_cache_with_no_date(self, cc):
 64 |         # No date header which makes our max-age pointless
 65 |         resp = self.resp({'cache-control': 'max-age=3600'})
 66 |         cc.cache_response(self.req(), resp)
 67 | 
 68 |         assert not cc.cache.set.called
 69 | 
 70 |     def test_cache_response_no_cache_control(self, cc):
 71 |         resp = self.resp()
 72 |         cc.cache_response(self.req(), resp)
 73 | 
 74 |         assert not cc.cache.set.called
 75 | 
 76 |     def test_cache_response_cache_max_age(self, cc):
 77 |         now = time.strftime(TIME_FMT, time.gmtime())
 78 |         resp = self.resp({'cache-control': 'max-age=3600',
 79 |                           'date': now})
 80 |         req = self.req()
 81 |         cc.cache_response(req, resp)
 82 |         cc.serializer.dumps.assert_called_with(req, resp, body=None)
 83 |         cc.cache.set.assert_called_with(self.url, ANY)
 84 | 
 85 |     def test_cache_repsonse_no_store(self):
 86 |         resp = Mock()
 87 |         cache = DictCache({self.url: resp})
 88 |         cc = CacheController(cache)
 89 | 
 90 |         cache_url = cc.cache_url(self.url)
 91 | 
 92 |         resp = self.resp({'cache-control': 'no-store'})
 93 |         assert cc.cache.get(cache_url)
 94 | 
 95 |         cc.cache_response(self.req(), resp)
 96 |         assert not cc.cache.get(cache_url)
 97 | 
 98 | 
 99 | class TestCacheControlRequest(object):
100 |     url = 'http://foo.com/bar'
101 | 
102 |     def setup(self):
103 |         self.c = CacheController(
104 |             DictCache(),
105 |             serializer=NullSerializer(),
106 |         )
107 | 
108 |     def req(self, headers):
109 |         return self.c.cached_request(Mock(url=self.url, headers=headers))
110 | 
111 |     def test_cache_request_no_cache(self):
112 |         resp = self.req({'cache-control': 'no-cache'})
113 |         assert not resp
114 | 
115 |     def test_cache_request_pragma_no_cache(self):
116 |         resp = self.req({'pragma': 'no-cache'})
117 |         assert not resp
118 | 
119 |     def test_cache_request_no_store(self):
120 |         resp = self.req({'cache-control': 'no-store'})
121 |         assert not resp
122 | 
123 |     def test_cache_request_max_age_0(self):
124 |         resp = self.req({'cache-control': 'max-age=0'})
125 |         assert not resp
126 | 
127 |     def test_cache_request_not_in_cache(self):
128 |         resp = self.req({})
129 |         assert not resp
130 | 
131 |     def test_cache_request_fresh_max_age(self):
132 |         now = time.strftime(TIME_FMT, time.gmtime())
133 |         resp = Mock(headers={'cache-control': 'max-age=3600',
134 |                              'date': now})
135 | 
136 |         cache = DictCache({self.url: resp})
137 |         self.c.cache = cache
138 |         r = self.req({})
139 |         assert r == resp
140 | 
141 |     def test_cache_request_unfresh_max_age(self):
142 |         earlier = time.time() - 3700  # epoch - 1h01m40s
143 |         now = time.strftime(TIME_FMT, time.gmtime(earlier))
144 |         resp = Mock(headers={'cache-control': 'max-age=3600',
145 |                              'date': now})
146 |         self.c.cache = DictCache({self.url: resp})
147 |         r = self.req({})
148 |         assert not r
149 | 
150 |     def test_cache_request_fresh_expires(self):
151 |         later = time.time() + 86400  # GMT + 1 day
152 |         expires = time.strftime(TIME_FMT, time.gmtime(later))
153 |         now = time.strftime(TIME_FMT, time.gmtime())
154 |         resp = Mock(headers={'expires': expires,
155 |                              'date': now})
156 |         cache = DictCache({self.url: resp})
157 |         self.c.cache = cache
158 |         r = self.req({})
159 |         assert r == resp
160 | 
161 |     def test_cache_request_unfresh_expires(self):
162 |         sooner = time.time() - 86400  # GMT - 1 day
163 |         expires = time.strftime(TIME_FMT, time.gmtime(sooner))
164 |         now = time.strftime(TIME_FMT, time.gmtime())
165 |         resp = Mock(headers={'expires': expires,
166 |                              'date': now})
167 |         cache = DictCache({self.url: resp})
168 |         self.c.cache = cache
169 |         r = self.req({})
170 |         assert not r
171 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CacheControl.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CacheControl.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/CacheControl"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CacheControl"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # CacheControl documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Nov  4 15:01:23 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.viewcode']
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'CacheControl'
 44 | copyright = u'2013, Eric Larson'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '0.6'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '0.6'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'default'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'CacheControldoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'CacheControl.tex', u'CacheControl Documentation',
187 |    u'Eric Larson', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'cachecontrol', u'CacheControl Documentation',
217 |      [u'Eric Larson'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'CacheControl', u'CacheControl Documentation',
231 |    u'Eric Larson', 'CacheControl', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/cachecontrol/controller.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The httplib2 algorithms ported for use with requests.
  3 | """
  4 | import re
  5 | import calendar
  6 | import time
  7 | 
  8 | from requests.structures import CaseInsensitiveDict
  9 | 
 10 | from .cache import DictCache
 11 | from .compat import parsedate_tz
 12 | from .serialize import Serializer
 13 | 
 14 | 
 15 | URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
 16 | 
 17 | 
 18 | def parse_uri(uri):
 19 |     """Parses a URI using the regex given in Appendix B of RFC 3986.
 20 | 
 21 |         (scheme, authority, path, query, fragment) = parse_uri(uri)
 22 |     """
 23 |     groups = URI.match(uri).groups()
 24 |     return (groups[1], groups[3], groups[4], groups[6], groups[8])
 25 | 
 26 | 
 27 | class CacheController(object):
 28 |     """An interface to see if request should cached or not.
 29 |     """
 30 |     def __init__(self, cache=None, cache_etags=True, serializer=None):
 31 |         self.cache = cache or DictCache()
 32 |         self.cache_etags = cache_etags
 33 |         self.serializer = serializer or Serializer()
 34 | 
 35 |     def _urlnorm(self, uri):
 36 |         """Normalize the URL to create a safe key for the cache"""
 37 |         (scheme, authority, path, query, fragment) = parse_uri(uri)
 38 |         if not scheme or not authority:
 39 |             raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
 40 |         authority = authority.lower()
 41 |         scheme = scheme.lower()
 42 |         if not path:
 43 |             path = "/"
 44 | 
 45 |         # Could do syntax based normalization of the URI before
 46 |         # computing the digest. See Section 6.2.2 of Std 66.
 47 |         request_uri = query and "?".join([path, query]) or path
 48 |         scheme = scheme.lower()
 49 |         defrag_uri = scheme + "://" + authority + request_uri
 50 | 
 51 |         return defrag_uri
 52 | 
 53 |     def cache_url(self, uri):
 54 |         return self._urlnorm(uri)
 55 | 
 56 |     def parse_cache_control(self, headers):
 57 |         """
 58 |         Parse the cache control headers returning a dictionary with values
 59 |         for the different directives.
 60 |         """
 61 |         retval = {}
 62 | 
 63 |         cc_header = 'cache-control'
 64 |         if 'Cache-Control' in headers:
 65 |             cc_header = 'Cache-Control'
 66 | 
 67 |         if cc_header in headers:
 68 |             parts = headers[cc_header].split(',')
 69 |             parts_with_args = [
 70 |                 tuple([x.strip().lower() for x in part.split("=", 1)])
 71 |                 for part in parts if -1 != part.find("=")]
 72 |             parts_wo_args = [(name.strip().lower(), 1)
 73 |                              for name in parts if -1 == name.find("=")]
 74 |             retval = dict(parts_with_args + parts_wo_args)
 75 |         return retval
 76 | 
 77 |     def cached_request(self, request):
 78 |         cache_url = self.cache_url(request.url)
 79 |         cc = self.parse_cache_control(request.headers)
 80 | 
 81 |         # non-caching states
 82 |         no_cache = True if 'no-cache' in cc else False
 83 |         if 'max-age' in cc and cc['max-age'] == 0:
 84 |             no_cache = True
 85 | 
 86 |         # Bail out if no-cache was set
 87 |         if no_cache:
 88 |             return False
 89 | 
 90 |         # It is in the cache, so lets see if it is going to be
 91 |         # fresh enough
 92 |         resp = self.serializer.loads(request, self.cache.get(cache_url))
 93 | 
 94 |         # Check to see if we have a cached object
 95 |         if not resp:
 96 |             return False
 97 | 
 98 |         headers = CaseInsensitiveDict(resp.headers)
 99 | 
100 |         now = time.time()
101 |         date = calendar.timegm(
102 |             parsedate_tz(headers['date'])
103 |         )
104 |         current_age = max(0, now - date)
105 | 
106 |         # TODO: There is an assumption that the result will be a
107 |         # urllib3 response object. This may not be best since we
108 |         # could probably avoid instantiating or constructing the
109 |         # response until we know we need it.
110 |         resp_cc = self.parse_cache_control(headers)
111 | 
112 |         # determine freshness
113 |         freshness_lifetime = 0
114 |         if 'max-age' in resp_cc and resp_cc['max-age'].isdigit():
115 |             freshness_lifetime = int(resp_cc['max-age'])
116 |         elif 'expires' in headers:
117 |             expires = parsedate_tz(headers['expires'])
118 |             if expires is not None:
119 |                 expire_time = calendar.timegm(expires) - date
120 |                 freshness_lifetime = max(0, expire_time)
121 | 
122 |         # determine if we are setting freshness limit in the req
123 |         if 'max-age' in cc:
124 |             try:
125 |                 freshness_lifetime = int(cc['max-age'])
126 |             except ValueError:
127 |                 freshness_lifetime = 0
128 | 
129 |         if 'min-fresh' in cc:
130 |             try:
131 |                 min_fresh = int(cc['min-fresh'])
132 |             except ValueError:
133 |                 min_fresh = 0
134 |             # adjust our current age by our min fresh
135 |             current_age += min_fresh
136 | 
137 |         # see how fresh we actually are
138 |         fresh = (freshness_lifetime > current_age)
139 | 
140 |         if fresh:
141 |             return resp
142 | 
143 |         # we're not fresh. If we don't have an Etag, clear it out
144 |         if 'etag' not in headers:
145 |             self.cache.delete(cache_url)
146 | 
147 |         # return the original handler
148 |         return False
149 | 
150 |     def conditional_headers(self, request):
151 |         cache_url = self.cache_url(request.url)
152 |         resp = self.serializer.loads(request, self.cache.get(cache_url))
153 |         new_headers = {}
154 | 
155 |         if resp:
156 |             headers = CaseInsensitiveDict(resp.headers)
157 | 
158 |             if 'etag' in headers:
159 |                 new_headers['If-None-Match'] = headers['ETag']
160 | 
161 |             if 'last-modified' in headers:
162 |                 new_headers['If-Modified-Since'] = headers['Last-Modified']
163 | 
164 |         return new_headers
165 | 
166 |     def cache_response(self, request, response, body=None):
167 |         """
168 |         Algorithm for caching requests.
169 | 
170 |         This assumes a requests Response object.
171 |         """
172 |         # From httplib2: Don't cache 206's since we aren't going to
173 |         # handle byte range requests
174 |         if response.status not in [200, 203]:
175 |             return
176 | 
177 |         response_headers = CaseInsensitiveDict(response.headers)
178 | 
179 |         cc_req = self.parse_cache_control(request.headers)
180 |         cc = self.parse_cache_control(response_headers)
181 | 
182 |         cache_url = self.cache_url(request.url)
183 | 
184 |         # Delete it from the cache if we happen to have it stored there
185 |         no_store = cc.get('no-store') or cc_req.get('no-store')
186 |         if no_store and self.cache.get(cache_url):
187 |             self.cache.delete(cache_url)
188 | 
189 |         # If we've been given an etag, then keep the response
190 |         if self.cache_etags and 'etag' in response_headers:
191 |             self.cache.set(
192 |                 cache_url,
193 |                 self.serializer.dumps(request, response, body=body),
194 |             )
195 | 
196 |         # Add to the cache if the response headers demand it. If there
197 |         # is no date header then we can't do anything about expiring
198 |         # the cache.
199 |         elif 'date' in response_headers:
200 |             # cache when there is a max-age > 0
201 |             if cc and cc.get('max-age'):
202 |                 if int(cc['max-age']) > 0:
203 |                     self.cache.set(
204 |                         cache_url,
205 |                         self.serializer.dumps(request, response, body=body),
206 |                     )
207 | 
208 |             # If the request can expire, it means we should cache it
209 |             # in the meantime.
210 |             elif 'expires' in response_headers:
211 |                 if response_headers['expires']:
212 |                     self.cache.set(
213 |                         cache_url,
214 |                         self.serializer.dumps(request, response, body=body),
215 |                     )
216 | 
217 |     def update_cached_response(self, request, response):
218 |         """On a 304 we will get a new set of headers that we want to
219 |         update our cached value with, assuming we have one.
220 | 
221 |         This should only ever be called when we've sent an ETag and
222 |         gotten a 304 as the response.
223 |         """
224 |         cache_url = self.cache_url(request.url)
225 | 
226 |         cached_response = self.serializer.loads(request, self.cache.get(cache_url))
227 | 
228 |         if not cached_response:
229 |             # we didn't have a cached response
230 |             return response
231 | 
232 |         # did so lets update our headers
233 |         cached_response.headers.update(response.headers)
234 | 
235 |         # we want a 200 b/c we have content via the cache
236 |         cached_response.status = 200
237 | 
238 |         # update our cache
239 |         self.cache.set(
240 |             cache_url,
241 |             self.serializer.dumps(request, cached_response),
242 |         )
243 | 
244 |         return cached_response
245 | 


--------------------------------------------------------------------------------