├── setup.cfg ├── .bumpversion.cfg ├── dev_requirements.txt ├── .gitignore ├── tox.ini ├── .travis.yml ├── CONTRIBUTORS.rst ├── cachecontrol ├── __init__.py ├── wrapper.py ├── caches │ ├── __init__.py │ ├── redis_cache.py │ └── file_cache.py ├── compat.py ├── cache.py ├── filewrapper.py ├── adapter.py ├── serialize.py └── controller.py ├── tests ├── test_storage_redis.py ├── test_adapter.py ├── test_storage_filecache.py ├── test_max_age.py ├── test_vary.py ├── test_etag.py └── test_cache_control.py ├── tasks.py ├── setup.py ├── README.rst ├── examples └── benchmark.py ├── docs ├── usage.rst ├── index.rst ├── tips.rst ├── storage.rst ├── etags.rst ├── Makefile └── conf.py └── conftest.py /setup.cfg: -------------------------------------------------------------------------------- 1 | [pytest] 2 | norecursedirs = bin lib include build -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.9.3 3 | files = setup.py 4 | commit = True 5 | tag = True 6 | 7 | -------------------------------------------------------------------------------- /dev_requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | 3 | tox 4 | pytest 5 | mock 6 | webtest 7 | sphinx 8 | redis 9 | lockfile 10 | bumpversion 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.pyc 3 | *.pyo 4 | *.egg-info/* 5 | dist 6 | bin 7 | lib 8 | lib64 9 | include 10 | .Python 11 | docs/_build 12 | build/ 13 | .tox -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py33 3 | 4 | [testenv] 5 | deps = pytest 6 | mock 7 | webtest 8 | redis 9 | lockfile 10 | commands = py.test -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | env: 4 | - TOXENV=py26 5 | - TOXENV=py27 6 | - TOXENV=py32 7 | - TOXENV=py33 8 | 9 | install: "pip install tox" 10 | 11 | script: tox 12 | -------------------------------------------------------------------------------- /CONTRIBUTORS.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | Contributors 3 | ============== 4 | 5 | Huge thanks to all those folks who have helped improve CacheControl! 6 | 7 | - Toby White 8 | - Ian Cordasco 9 | - Cory Benfield 10 | - Javier de la Rosa 11 | -------------------------------------------------------------------------------- /cachecontrol/__init__.py: -------------------------------------------------------------------------------- 1 | """CacheControl import Interface. 2 | 3 | Make it easy to import from cachecontrol without long namespaces. 4 | """ 5 | from .wrapper import CacheControl 6 | from .adapter import CacheControlAdapter 7 | from .controller import CacheController 8 | -------------------------------------------------------------------------------- /tests/test_storage_redis.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | from mock import Mock 4 | from cachecontrol.caches import RedisCache 5 | 6 | 7 | class TestRedisCache(object): 8 | 9 | def setup(self): 10 | self.conn = Mock() 11 | self.cache = RedisCache(self.conn) 12 | 13 | def test_set_expiration(self): 14 | self.cache.set('foo', 'bar', expires=datetime(2014, 2, 2)) 15 | assert self.conn.setex.called 16 | -------------------------------------------------------------------------------- /cachecontrol/wrapper.py: -------------------------------------------------------------------------------- 1 | from .adapter import CacheControlAdapter 2 | from .cache import DictCache 3 | 4 | 5 | def CacheControl(sess, cache=None, cache_etags=True, serializer=None): 6 | cache = cache or DictCache() 7 | adapter = CacheControlAdapter( 8 | cache, 9 | cache_etags=cache_etags, 10 | serializer=serializer, 11 | ) 12 | sess.mount('http://', adapter) 13 | sess.mount('https://', adapter) 14 | 15 | return sess 16 | -------------------------------------------------------------------------------- /cachecontrol/caches/__init__.py: -------------------------------------------------------------------------------- 1 | from textwrap import dedent 2 | 3 | try: 4 | from .file_cache import FileCache 5 | except ImportError: 6 | notice = dedent(''' 7 | NOTE: In order to use the FileCache you must have 8 | lockfile installed. You can install it via pip: 9 | pip install lockfile 10 | ''') 11 | print(notice) 12 | 13 | 14 | try: 15 | import redis 16 | from .redis_cache import RedisCache 17 | except ImportError: 18 | pass 19 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | from invoke import run, task 2 | 3 | 4 | VENV = 'venv' 5 | 6 | 7 | def env_do(tail, **kw): 8 | return run('%s/bin/%s' % (VENV, tail), **kw) 9 | 10 | 11 | @task 12 | def virtualenv(): 13 | run('virtualenv %s' % VENV) 14 | 15 | 16 | @task('virtualenv') 17 | def bootstrap(): 18 | env_do('pip install -r dev_requirements.txt') 19 | 20 | 21 | @task 22 | def clean_env(): 23 | run('rm -r venv') 24 | 25 | 26 | @task 27 | def test_all(): 28 | env_do('tox') 29 | 30 | 31 | @task 32 | def docs(): 33 | run('cd docs && make html') 34 | 35 | 36 | @task 37 | def release(part): 38 | env_do('bumpversion %s' % part) 39 | run('git push origin master') 40 | run('git push --tags origin master') 41 | env_do('python setup.py sdist upload') 42 | -------------------------------------------------------------------------------- /cachecontrol/compat.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.parse import urljoin 3 | except ImportError: 4 | from urlparse import urljoin 5 | 6 | 7 | try: 8 | import email.utils 9 | parsedate_tz = email.utils.parsedate_tz 10 | except ImportError: 11 | import email.Utils 12 | parsedate_tz = email.Utils.parsedate_tz 13 | 14 | 15 | try: 16 | import cPickle as pickle 17 | except ImportError: 18 | import pickle 19 | 20 | 21 | # Handle the case where the requests has been patched to not have urllib3 22 | # bundled as part of it's source. 23 | try: 24 | from requests.packages.urllib3.response import HTTPResponse 25 | except ImportError: 26 | from urllib3.response import HTTPResponse 27 | 28 | try: 29 | from requests.packages.urllib3.util import is_fp_closed 30 | except ImportError: 31 | from urllib3.util import is_fp_closed 32 | -------------------------------------------------------------------------------- /cachecontrol/cache.py: -------------------------------------------------------------------------------- 1 | """ 2 | The cache object API for implementing caches. The default is just a 3 | dictionary, which in turns means it is not threadsafe for writing. 4 | """ 5 | from threading import Lock 6 | 7 | 8 | class BaseCache(object): 9 | 10 | def get(self, key): 11 | raise NotImplemented() 12 | 13 | def set(self, key, value): 14 | raise NotImplemented() 15 | 16 | def delete(self, key): 17 | raise NotImplemented() 18 | 19 | 20 | class DictCache(BaseCache): 21 | 22 | def __init__(self, init_dict=None): 23 | self.lock = Lock() 24 | self.data = init_dict or {} 25 | 26 | def get(self, key): 27 | return self.data.get(key, None) 28 | 29 | def set(self, key, value): 30 | with self.lock: 31 | self.data.update({key: value}) 32 | 33 | def delete(self, key): 34 | with self.lock: 35 | if key in self.data: 36 | self.data.pop(key) 37 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | long_description = open('README.rst').read() 4 | 5 | VERSION = '0.9.3' 6 | 7 | setup_params = dict( 8 | name='CacheControl', 9 | version=VERSION, 10 | author='Eric Larson', 11 | author_email='eric@ionrock.org', 12 | license='MIT', 13 | url='https://github.com/ionrock/cachecontrol', 14 | keywords='requests http caching web', 15 | packages=setuptools.find_packages(), 16 | description='httplib2 caching for requests', 17 | long_description=long_description, 18 | install_requires=[ 19 | 'requests', 20 | ], 21 | classifiers=[ 22 | 'Development Status :: 4 - Beta', 23 | 'Environment :: Web Environment', 24 | 'License :: OSI Approved :: MIT License', 25 | 'Operating System :: OS Independent', 26 | 'Programming Language :: Python', 27 | 'Topic :: Internet :: WWW/HTTP', 28 | ], 29 | ) 30 | 31 | 32 | if __name__ == '__main__': 33 | setuptools.setup(**setup_params) 34 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | CacheControl 3 | ============== 4 | 5 | CacheControl is a port of the caching algorithms in httplib2_ for use with 6 | requests_ session object. 7 | 8 | It was written because httplib2's better support for caching is often 9 | mitigated by its lack of threadsafety. The same is true of requests in 10 | terms of caching. 11 | 12 | 13 | Quickstart 14 | ========== 15 | 16 | :: 17 | 18 | import requests 19 | 20 | from cachecontrol import CacheControl 21 | 22 | 23 | sess = requests.session() 24 | cached_sess = CacheControl(sess) 25 | 26 | response = cached_sess.get('http://google.com') 27 | 28 | If the URL contains any caching based headers, it will cache the 29 | result in a simple dictionary. 30 | 31 | For more info, check out the docs_ 32 | 33 | .. image:: https://travis-ci.org/ionrock/cachecontrol.png?branch=master 34 | :target: https://travis-ci.org/ionrock/cachecontrol 35 | 36 | .. _docs: http://cachecontrol.readthedocs.org/en/latest/ 37 | .. _httplib2: http://code.google.com/p/httplib2/ 38 | .. _requests: http://docs.python-requests.org/ 39 | -------------------------------------------------------------------------------- /cachecontrol/filewrapper.py: -------------------------------------------------------------------------------- 1 | from .compat import is_fp_closed 2 | 3 | 4 | class CallbackFileWrapper(object): 5 | """ 6 | Small wrapper around a fp object which will tee everything read into a 7 | buffer, and when that file is closed it will execute a callback with the 8 | contents of that buffer. 9 | 10 | All attributes are proxied to the underlying file object. 11 | 12 | This class uses members with a double underscore (__) leading prefix so as 13 | not to accidentally shadow an attribute. 14 | """ 15 | 16 | def __init__(self, fp, callback): 17 | self.__buf = b"" 18 | self.__fp = fp 19 | self.__callback = callback 20 | 21 | def __getattr__(self, name): 22 | return getattr(self.__fp, name) 23 | 24 | def read(self, amt=None): 25 | data = self.__fp.read(amt) 26 | self.__buf += data 27 | 28 | # Is this the best way to figure out if the file has been completely 29 | # consumed? 30 | if is_fp_closed(self.__fp): 31 | self.__callback(self.__buf) 32 | 33 | return data 34 | -------------------------------------------------------------------------------- /cachecontrol/caches/redis_cache.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from datetime import datetime 4 | 5 | 6 | def total_seconds(td): 7 | """Python 2.6 compatability""" 8 | if hasattr(td, 'total_seconds'): 9 | return td.total_seconds() 10 | 11 | ms = td.microseconds 12 | secs = (td.seconds + td.days * 24 * 3600) 13 | return (ms + secs * 10**6) / 10**6 14 | 15 | 16 | class RedisCache(object): 17 | 18 | def __init__(self, conn): 19 | self.conn = conn 20 | 21 | def get(self, key): 22 | return self.conn.get(key) 23 | 24 | def set(self, key, value, expires=None): 25 | if not expires: 26 | self.conn.set(key, value) 27 | else: 28 | expires = expires - datetime.now() 29 | self.conn.setex(key, total_seconds(expires), value) 30 | 31 | def delete(self, key): 32 | self.conn.delete(key) 33 | 34 | def clear(self): 35 | """Helper for clearing all the keys in a database. Use with 36 | caution!""" 37 | for key in self.conn.keys(): 38 | self.conn.delete(key) 39 | -------------------------------------------------------------------------------- /tests/test_adapter.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from requests import Session 4 | from cachecontrol.adapter import CacheControlAdapter 5 | from cachecontrol.wrapper import CacheControl 6 | 7 | 8 | def use_wrapper(): 9 | print('Using helper') 10 | sess = CacheControl(Session()) 11 | return sess 12 | 13 | 14 | def use_adapter(): 15 | print('Using adapter') 16 | sess = Session() 17 | sess.mount('http://', CacheControlAdapter()) 18 | return sess 19 | 20 | 21 | @pytest.fixture(params=[use_adapter, use_wrapper]) 22 | def sess(url, request): 23 | sess = request.param() 24 | sess.get(url) 25 | return sess 26 | 27 | 28 | class TestSessionActions(object): 29 | 30 | def test_get_caches(self, url, sess): 31 | r2 = sess.get(url) 32 | assert r2.from_cache is True 33 | 34 | def test_get_with_no_cache_does_not_cache(self, url, sess): 35 | r2 = sess.get(url, headers={'Cache-Control': 'no-cache'}) 36 | assert not r2.from_cache 37 | 38 | def test_put_invalidates_cache(self, url, sess): 39 | r2 = sess.put(url, data={'foo': 'bar'}) 40 | sess.get(url) 41 | assert not r2.from_cache 42 | 43 | def test_delete_invalidates_cache(self, url, sess): 44 | r2 = sess.delete(url) 45 | sess.get(url) 46 | assert not r2.from_cache 47 | -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import requests 3 | import argparse 4 | 5 | from multiprocessing import Process 6 | from datetime import datetime 7 | from wsgiref.simple_server import make_server 8 | from cachecontrol import CacheControl 9 | 10 | HOST = 'localhost' 11 | PORT = 8050 12 | URL = 'http://{0}:{1}/'.format(HOST, PORT) 13 | 14 | 15 | class Server(object): 16 | 17 | def __call__(self, env, sr): 18 | body = 'Hello World!' 19 | status = '200 OK' 20 | headers = [ 21 | ('Cache-Control', 'max-age=%i' % (60 * 10)), 22 | ('Content-Type', 'text/plain'), 23 | ] 24 | sr(status, headers) 25 | return body 26 | 27 | 28 | def start_server(): 29 | httpd = make_server(HOST, PORT, Server()) 30 | httpd.serve_forever() 31 | 32 | 33 | def run_benchmark(sess): 34 | proc = Process(target=start_server) 35 | proc.start() 36 | 37 | start = datetime.now() 38 | for i in xrange(0, 1000): 39 | sess.get(URL) 40 | sys.stdout.write('.') 41 | end = datetime.now() 42 | print() 43 | 44 | total = end - start 45 | print('Total time for 1000 requests: %s' % total) 46 | proc.terminate() 47 | 48 | 49 | def run(): 50 | parser = argparse.ArgumentParser() 51 | parser.add_argument('-n', '--no-cache', 52 | default=False, 53 | action='store_true', 54 | help='Do not use cachecontrol') 55 | args = parser.parse_args() 56 | 57 | sess = requests.Session() 58 | if not args.no_cache: 59 | sess = CacheControl(sess) 60 | 61 | run_benchmark(sess) 62 | 63 | 64 | if __name__ == '__main__': 65 | run() 66 | -------------------------------------------------------------------------------- /tests/test_storage_filecache.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests that verify FileCache storage works correctly. 3 | """ 4 | 5 | import string 6 | 7 | from random import randint, sample 8 | 9 | import pytest 10 | import requests 11 | from cachecontrol import CacheControl 12 | from cachecontrol.caches import FileCache 13 | 14 | STORAGE_FOLDER = ".cache" 15 | 16 | 17 | def randomdata(): 18 | """Plain random http data generator:""" 19 | key = ''.join(sample(string.ascii_lowercase, randint(2, 4))) 20 | val = ''.join(sample(string.ascii_lowercase + string.digits, 21 | randint(2, 10))) 22 | return '&{0}={1}'.format(key, val) 23 | 24 | 25 | class TestStorageFileCache(object): 26 | 27 | @pytest.fixture() 28 | def sess(self, server): 29 | self.url = server.application_url 30 | self.cache = FileCache(STORAGE_FOLDER) 31 | sess = CacheControl(requests.Session(), cache=self.cache) 32 | return sess 33 | 34 | def test_filecache_from_cache(self, sess): 35 | response = sess.get(self.url) 36 | assert not response.from_cache 37 | response = sess.get(self.url) 38 | assert response.from_cache 39 | 40 | def test_key_length(self, sess): 41 | """ 42 | Hash table keys: 43 | Most file systems have a 255 characters path limitation. 44 | * Make sure hash method does not produce too long keys 45 | * Ideally hash method generate fixed length keys 46 | """ 47 | url0 = url1 = 'http://example.org/res?a=1' 48 | while len(url0) < 255: 49 | url0 += randomdata() 50 | url1 += randomdata() 51 | assert len(self.cache.encode(url0)) < 200 52 | assert len(self.cache.encode(url0)) == len(self.cache.encode(url1)) 53 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Using CacheControl 3 | ==================== 4 | 5 | CacheControl assumes you are using a `requests.Session` for your 6 | requests. If you are making ad-hoc requests using `requests.get` then 7 | you probably are not terribly concerned about caching. 8 | 9 | There are two way to use CacheControl, via the wrapper and the 10 | adapter. 11 | 12 | 13 | Wrapper 14 | ======= 15 | 16 | The easiest way to use CacheControl is to utilize the basic 17 | wrapper. Here is an example: :: 18 | 19 | import requests 20 | import cachecontrol 21 | 22 | sess = cachecontrol.CacheControl(requests.Session()) 23 | resp = sess.get('http://google.com') 24 | 25 | This uses the default cache store, a thread safe in-memory dictionary. 26 | 27 | 28 | Adapter 29 | ======= 30 | 31 | The other way to use CacheControl is via a requests `Transport 32 | Adapter`_. 33 | 34 | Here is how the adapter works: :: 35 | 36 | import requests 37 | import cachecontrol 38 | 39 | sess = requests.Session() 40 | sess.mount('http://', CacheControlAdapter()) 41 | 42 | resp = sess.get('http://google.com') 43 | 44 | 45 | Under the hood, the wrapper method of using CacheControl mentioned 46 | above is the same as this example. 47 | 48 | 49 | Use a Different Cache Store 50 | =========================== 51 | 52 | Both the wrapper and adapter classes allow providing a custom cache 53 | store object for storing your cached data. Here is an example using 54 | the provided `FileCache` from CacheControl: :: 55 | 56 | import requests 57 | 58 | from cachecontrol import CacheControl 59 | 60 | # NOTE: This requires lockfile be installed 61 | from cachecontrol.caches import FileCache 62 | 63 | sess = CacheControl(requests.Session(), 64 | cache=FileCache('.webcache')) 65 | 66 | 67 | The `FileCache` will create a directory called `.webcache` and store a 68 | file for each cached request. 69 | 70 | 71 | 72 | .. _Transport Adapter: http://docs.python-requests.org/en/latest/user/advanced/#transport-adapters 73 | -------------------------------------------------------------------------------- /tests/test_max_age.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pytest 3 | 4 | from requests import Session 5 | from cachecontrol.adapter import CacheControlAdapter 6 | from cachecontrol.cache import DictCache 7 | 8 | 9 | class NullSerializer(object): 10 | 11 | def dumps(self, request, response, body=None): 12 | return response 13 | 14 | def loads(self, request, data): 15 | return data 16 | 17 | 18 | class TestMaxAge(object): 19 | 20 | @pytest.fixture() 21 | def sess(self, server): 22 | self.url = server.application_url 23 | self.cache = DictCache() 24 | sess = Session() 25 | sess.mount( 26 | 'http://', 27 | CacheControlAdapter(self.cache, serializer=NullSerializer()), 28 | ) 29 | return sess 30 | 31 | def test_client_max_age_0(self, sess): 32 | """ 33 | Making sure when the client uses max-age=0 we don't get a 34 | cached copy even though we're still fresh. 35 | """ 36 | print('first request') 37 | r = sess.get(self.url) 38 | assert self.cache.get(self.url) == r.raw 39 | 40 | print('second request') 41 | r = sess.get(self.url, headers={'Cache-Control': 'max-age=0'}) 42 | 43 | # don't remove from the cache 44 | assert self.cache.get(self.url) 45 | assert not r.from_cache 46 | 47 | def test_client_max_age_3600(self, sess): 48 | """ 49 | Verify we get a cached value when the client has a 50 | reasonable max-age value. 51 | """ 52 | r = sess.get(self.url) 53 | assert self.cache.get(self.url) == r.raw 54 | 55 | # request that we don't want a new one unless 56 | r = sess.get(self.url, headers={'Cache-Control': 'max-age=3600'}) 57 | assert r.from_cache is True 58 | 59 | # now lets grab one that forces a new request b/c the cache 60 | # has expired. To do that we'll inject a new time value. 61 | resp = self.cache.get(self.url) 62 | resp.headers['date'] = 'Tue, 15 Nov 1994 08:12:31 GMT' 63 | r = sess.get(self.url) 64 | assert not r.from_cache 65 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. CacheControl documentation master file, created by 2 | sphinx-quickstart on Mon Nov 4 15:01:23 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to CacheControl's documentation! 7 | ======================================== 8 | 9 | CacheControl is a port of the caching algorithms in httplib2_ for use with 10 | requests_ session object. 11 | 12 | It was written because httplib2's better support for caching is often 13 | mitigated by its lack of threadsafety. The same is true of requests in 14 | terms of caching. 15 | 16 | 17 | Install 18 | ======= 19 | 20 | CacheControl is available from PyPI_. You can install it with pip_ :: 21 | 22 | $ pip install CacheControl 23 | 24 | Some of the included cache storage classes have external 25 | requirements. See :doc:`storage` for more info. 26 | 27 | 28 | 29 | Quick Start 30 | =========== 31 | 32 | For the impatient, here is how to get started using CacheControl :: 33 | 34 | import requests 35 | 36 | from cachecontrol import CacheControl 37 | 38 | 39 | sess = requests.session() 40 | cached_sess = CacheControl(sess) 41 | 42 | response = cached_sess.get('http://google.com') 43 | 44 | 45 | This uses a threadsafe in memory dictionary for storage. 46 | 47 | 48 | Tests 49 | ===== 50 | 51 | The tests are all in cachecontrol/tests and is runnable by py.test. 52 | 53 | 54 | Disclaimers 55 | =========== 56 | 57 | CacheControl is relatively new and maybe have bugs. I have made an 58 | effort to faithfully port the tests from httplib2 to CacheControl, but 59 | there is a decent chance that I've missed something. Please file bugs 60 | if you find any issues! 61 | 62 | With that in mind, CacheControl has been used sucessfully in a 63 | production environments, replacing httplib2's usage. 64 | 65 | If you give it a try, please let me know of any issues. 66 | 67 | 68 | .. _httplib2: http://code.google.com/p/httplib2/ 69 | .. _requests: http://docs.python-requests.org/ 70 | .. _Editing the Web: http://www.w3.org/1999/04/Editing/ 71 | .. _PyPI: https://pypi.python.org/pypi/CacheControl/ 72 | .. _pip: http://www.pip-installer.org/ 73 | 74 | 75 | Contents: 76 | 77 | .. toctree:: 78 | :maxdepth: 2 79 | 80 | usage 81 | storage 82 | etags 83 | tips 84 | 85 | 86 | 87 | Indices and tables 88 | ================== 89 | 90 | * :ref:`genindex` 91 | * :ref:`modindex` 92 | * :ref:`search` 93 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | from pprint import pformat 2 | 3 | import pytest 4 | 5 | from webtest.http import StopableWSGIServer 6 | 7 | 8 | class SimpleApp(object): 9 | 10 | def __init__(self): 11 | self.etag_count = 0 12 | self.update_etag_string() 13 | 14 | def dispatch(self, env): 15 | path = env['PATH_INFO'][1:].split('/') 16 | segment = path.pop(0) 17 | if segment and hasattr(self, segment): 18 | return getattr(self, segment) 19 | return None 20 | 21 | def vary_accept(self, env, start_response): 22 | headers = [ 23 | ('Cache-Control', 'max-age=5000'), 24 | ('Content-Type', 'text/plain'), 25 | ('Vary', 'Accept-Encoding, Accept'), 26 | ] 27 | start_response('200 OK', headers) 28 | return [pformat(env).encode("utf8")] 29 | 30 | def update_etag_string(self): 31 | self.etag_count += 1 32 | self.etag_string = '"ETAG-{0}"'.format(self.etag_count) 33 | 34 | def update_etag(self, env, start_response): 35 | self.update_etag_string() 36 | headers = [ 37 | ('Cache-Control', 'max-age=5000'), 38 | ('Content-Type', 'text/plain'), 39 | ] 40 | start_response('200 OK', headers) 41 | return [pformat(env).encode("utf8")] 42 | 43 | def etag(self, env, start_response): 44 | headers = [ 45 | ('Etag', self.etag_string), 46 | ] 47 | if env.get('HTTP_IF_NONE_MATCH') == self.etag_string: 48 | start_response('304 Not Modified', headers) 49 | else: 50 | start_response('200 OK', headers) 51 | return [pformat(env).encode("utf8")] 52 | 53 | def __call__(self, env, start_response): 54 | func = self.dispatch(env) 55 | 56 | if func: 57 | return func(env, start_response) 58 | 59 | headers = [ 60 | ('Cache-Control', 'max-age=5000'), 61 | ('Content-Type', 'text/plain'), 62 | ] 63 | start_response('200 OK', headers) 64 | return [pformat(env).encode("utf8")] 65 | 66 | 67 | @pytest.fixture(scope='session') 68 | def server(): 69 | return pytest.server 70 | 71 | 72 | @pytest.fixture() 73 | def url(server): 74 | return server.application_url 75 | 76 | 77 | def pytest_namespace(): 78 | return dict(server=StopableWSGIServer.create(SimpleApp())) 79 | 80 | 81 | def pytest_unconfigure(config): 82 | pytest.server.shutdown() 83 | -------------------------------------------------------------------------------- /docs/tips.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | Tips and Best Practices 3 | ========================= 4 | 5 | Caching is hard! It is considered one of the great challenges of 6 | computer science. Fortunatley, the HTTP spec helps to navigate some 7 | pitfalls of invalidation using stale responses. Below are some 8 | suggestions and best practices to help avoid the more subtle issues 9 | that can crop up using CacheControl and HTTP caching. 10 | 11 | If you have a suggetions please create a new issue in `github 12 | `_ and let folks know 13 | what you ran into and how you fixed it. 14 | 15 | 16 | Timezones 17 | ========= 18 | 19 | It is important to remember that the times reported by a server may or 20 | may not be timezone aware. If you are using CacheControl with a 21 | service you control, make sure any timestamps are used consistently, 22 | especially if requests might cross timezones. 23 | 24 | 25 | Cached Responses 26 | ================ 27 | 28 | We've done our best to make sure cached responses act like a normal 29 | response, but there are aspects that are different for somewhat 30 | obvious reasons. 31 | 32 | - Cached responses are never streaming 33 | - Cached repsonses have `None` for the `raw` attribute 34 | 35 | Obviously, when you cache a response, you have downloaded the entire 36 | body. Therefore, there is never a use case for streaming a cached 37 | response. 38 | 39 | With that in mind, you should be aware that if you try to cache a very 40 | large response on a network store, you still might have some latency 41 | tranferring the data from the network store to your 42 | application. Another consideration is storing large responses in a 43 | `FileCache`. If you are caching using ETags and the server is 44 | extremely specific as to what constitutes an equivalent request, it 45 | could provide many different responses for essentially the same data 46 | within the context of your application. 47 | 48 | 49 | Query String Params 50 | =================== 51 | 52 | If you are caching requests that use a large number of query string 53 | parameters, consider sorting them to ensure that the request is 54 | properly cached. 55 | 56 | Requests supports passing both dictionaries and lists of tuples as the 57 | param argument in a request. For example: :: 58 | 59 | requests.get(url, params=sorted([('foo', 'one'), ('bar', 'two')])) 60 | 61 | By ordering your params, you can be sure the cache key will be 62 | consistent across requests and you are caching effectively. 63 | -------------------------------------------------------------------------------- /cachecontrol/caches/file_cache.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | 4 | from lockfile import FileLock 5 | 6 | 7 | def _secure_open_write(filename, fmode): 8 | # We only want to write to this file, so open it in write only mode 9 | flags = os.O_WRONLY 10 | 11 | # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only 12 | # will open *new* files. 13 | # We specify this because we want to ensure that the mode we pass is the 14 | # mode of the file. 15 | flags |= os.O_CREAT | os.O_EXCL 16 | 17 | # Do not follow symlinks to prevent someone from making a symlink that 18 | # we follow and insecurely open a cache file. 19 | if hasattr(os, "O_NOFOLLOW"): 20 | flags |= os.O_NOFOLLOW 21 | 22 | # On Windows we'll mark this file as binary 23 | if hasattr(os, "O_BINARY"): 24 | flags |= os.O_BINARY 25 | 26 | # Before we open our file, we want to delete any existing file that is 27 | # there 28 | try: 29 | os.remove(filename) 30 | except (IOError, OSError): 31 | # The file must not exist already, so we can just skip ahead to opening 32 | pass 33 | 34 | # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a 35 | # race condition happens between the os.remove and this line, that an 36 | # error will be raised. Because we utilize a lockfile this should only 37 | # happen if someone is attempting to attack us. 38 | fd = os.open(filename, flags, fmode) 39 | try: 40 | return os.fdopen(fd, "wb") 41 | except: 42 | # An error occurred wrapping our FD in a file object 43 | os.close(fd) 44 | raise 45 | 46 | 47 | class FileCache(object): 48 | def __init__(self, directory, forever=False, filemode=0o0600, 49 | dirmode=0o0700): 50 | self.directory = directory 51 | self.forever = forever 52 | self.filemode = filemode 53 | 54 | if not os.path.isdir(self.directory): 55 | os.makedirs(self.directory, dirmode) 56 | 57 | @staticmethod 58 | def encode(x): 59 | return hashlib.sha224(x.encode()).hexdigest() 60 | 61 | def _fn(self, name): 62 | return os.path.join(self.directory, self.encode(name)) 63 | 64 | def get(self, key): 65 | name = self._fn(key) 66 | if not os.path.exists(name): 67 | return None 68 | 69 | with open(name, 'rb') as fh: 70 | return fh.read() 71 | 72 | def set(self, key, value): 73 | name = self._fn(key) 74 | with FileLock(name) as lock: 75 | with _secure_open_write(lock.path, self.filemode) as fh: 76 | fh.write(value) 77 | 78 | def delete(self, key): 79 | name = self._fn(key) 80 | if not self.forever: 81 | os.remove(name) 82 | -------------------------------------------------------------------------------- /tests/test_vary.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | 4 | from cachecontrol import CacheControl 5 | from cachecontrol.cache import DictCache 6 | from cachecontrol.compat import urljoin 7 | 8 | 9 | class TestVary(object): 10 | 11 | @pytest.fixture() 12 | def sess(self, server): 13 | self.url = urljoin(server.application_url, '/vary_accept') 14 | self.cache = DictCache() 15 | sess = CacheControl(requests.Session(), cache=self.cache) 16 | return sess 17 | 18 | def cached_equal(self, cached, resp): 19 | checks = [ 20 | cached._fp.getvalue() == resp.content, 21 | cached.headers == resp.raw.headers, 22 | cached.status == resp.raw.status, 23 | cached.version == resp.raw.version, 24 | cached.reason == resp.raw.reason, 25 | cached.strict == resp.raw.strict, 26 | cached.decode_content == resp.raw.decode_content, 27 | ] 28 | return all(checks) 29 | 30 | def test_vary_example(self, sess): 31 | """RFC 2616 13.6 32 | 33 | When the cache receives a subsequent request whose Request-URI 34 | specifies one or more cache entries including a Vary header field, 35 | the cache MUST NOT use such a cache entry to construct a response 36 | to the new request unless all of the selecting request-headers 37 | present in the new request match the corresponding stored 38 | request-headers in the original request. 39 | 40 | Or, in simpler terms, when you make a request and the server 41 | returns defines a Vary header, unless all the headers listed 42 | in the Vary header are the same, it won't use the cached 43 | value. 44 | """ 45 | s = sess.adapters["http://"].controller.serializer 46 | r = sess.get(self.url) 47 | c = s.loads(r.request, self.cache.get(self.url)) 48 | 49 | # make sure we cached it 50 | assert self.cached_equal(c, r) 51 | 52 | # make the same request 53 | resp = sess.get(self.url) 54 | assert self.cached_equal(c, resp) 55 | assert resp.from_cache 56 | 57 | # make a similar request, changing the accept header 58 | resp = sess.get(self.url, headers={'Accept': 'text/plain, text/html'}) 59 | assert not self.cached_equal(c, resp) 60 | assert not resp.from_cache 61 | 62 | # Just confirming two things here: 63 | # 64 | # 1) The server used the vary header 65 | # 2) We have more than one header we vary on 66 | # 67 | # The reason for this is that when we don't specify the header 68 | # in the request, it is considered the same in terms of 69 | # whether or not to use the cached value. 70 | assert 'vary' in r.headers 71 | assert len(r.headers['vary'].replace(' ', '').split(',')) == 2 72 | -------------------------------------------------------------------------------- /docs/storage.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Storing Cache Data 3 | ==================== 4 | 5 | CacheControl comes with a few storage backends for storing your 6 | cache'd objects. 7 | 8 | 9 | DictCache 10 | ========= 11 | 12 | The `DictCache` is the default cache used when no other is 13 | provided. It is a simple threadsafe dictionary. It doesn't try to do 14 | anything smart about deadlocks or forcing a busted cache, but it 15 | should be reasonably safe to use. 16 | 17 | Also, the `DictCache` does not transform the request or response 18 | objects in anyway. Therefore it is unlikely you could persist the 19 | entire cache to disk. The converse is that it should be very fast. 20 | 21 | 22 | FileCache 23 | ========= 24 | 25 | The `FileCache` is similar to the caching mechanism provided by 26 | httplib2_. It requires `lockfile`_ be installed as it prevents 27 | multiple threads from writing to the same file at the same time. 28 | 29 | Here is an example using the `FileCache`: :: 30 | 31 | import requests 32 | from cachecontrol import CacheControl 33 | from cachecontrol.caches import FileCache 34 | 35 | sess = CacheControl(requests.Session(), 36 | cache=FileCache('.web_cache')) 37 | 38 | 39 | The `FileCache` supports a `forever` flag that disables deleting from 40 | the cache. This can be helpful in debugging applications that make 41 | many web requests that you don't want to repeat. It also can be 42 | helpful in testing. Here is an example of how to use it: :: 43 | 44 | forever_cache = FileCache('.web_cache', forever=True) 45 | sess = CacheControl(requests.Session(), forever_cache) 46 | 47 | 48 | :A Note About Pickle: 49 | 50 | It should be noted that the `FileCache` uses pickle to store the 51 | cached response. Prior to `requests 2.1`_, `requests.Response` 52 | objects were not 'pickleable' due to the use of `IOBase` base 53 | classes in `urllib3` `HTTPResponse` objects. In CacheControl we work 54 | around this by patching the Response objects with the appropriate 55 | `__getstate__` and `__setstate__` methods when the requests version 56 | doesn't natively support Response pickling. 57 | 58 | 59 | 60 | RedisCache 61 | ========== 62 | 63 | The `RedisCache` uses a Redis database to store values. The values are 64 | stored as strings in redis, which means the get, set and delete 65 | actions are used. 66 | 67 | The `RedisCache` also provides a clear method to delete all keys in a 68 | database. Obviously, this should be used with caution as it is naive 69 | and works iteratively, looping over each key and deleting it. 70 | 71 | Here is an example using a `RedisCache`: :: 72 | 73 | import redis 74 | import requests 75 | from cachecontrol import CacheControl 76 | from cachecontrol.caches import RedisCache 77 | 78 | 79 | pool = redis.ConnectionPool(host='localhost', port=6379, db=0) 80 | r = redis.Redis(connection_pool=pool) 81 | sess = CacheControl(requests.Session(), RedisCache(r)) 82 | 83 | This is primarily a proof of concept, so please file bugs if there is 84 | a better method for utilizing redis as a cache. 85 | 86 | 87 | .. _httplib2: http://code.google.com/p/httplib2/ 88 | .. _lockfile: https://github.com/smontanaro/pylockfile 89 | .. _requests 2.1: http://docs.python-requests.org/en/latest/community/updates/#id2 90 | -------------------------------------------------------------------------------- /cachecontrol/adapter.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | from requests.adapters import HTTPAdapter 4 | 5 | from .controller import CacheController 6 | from .cache import DictCache 7 | from .filewrapper import CallbackFileWrapper 8 | 9 | 10 | class CacheControlAdapter(HTTPAdapter): 11 | invalidating_methods = set(['PUT', 'DELETE']) 12 | 13 | def __init__(self, cache=None, cache_etags=True, controller_class=None, 14 | serializer=None, *args, **kw): 15 | super(CacheControlAdapter, self).__init__(*args, **kw) 16 | self.cache = cache or DictCache() 17 | 18 | controller_factory = controller_class or CacheController 19 | self.controller = controller_factory( 20 | self.cache, 21 | cache_etags=cache_etags, 22 | serializer=serializer, 23 | ) 24 | 25 | def send(self, request, **kw): 26 | """ 27 | Send a request. Use the request information to see if it 28 | exists in the cache and cache the response if we need to and can. 29 | """ 30 | if request.method == 'GET': 31 | cached_response = self.controller.cached_request(request) 32 | if cached_response: 33 | return self.build_response(request, cached_response, from_cache=True) 34 | 35 | # check for etags and add headers if appropriate 36 | request.headers.update(self.controller.conditional_headers(request)) 37 | 38 | resp = super(CacheControlAdapter, self).send(request, **kw) 39 | 40 | return resp 41 | 42 | def build_response(self, request, response, from_cache=False): 43 | """ 44 | Build a response by making a request or using the cache. 45 | 46 | This will end up calling send and returning a potentially 47 | cached response 48 | """ 49 | if not from_cache and request.method == 'GET': 50 | if response.status == 304: 51 | # We must have sent an ETag request. This could mean 52 | # that we've been expired already or that we simply 53 | # have an etag. In either case, we want to try and 54 | # update the cache if that is the case. 55 | cached_response = self.controller.update_cached_response( 56 | request, response 57 | ) 58 | 59 | if cached_response is not response: 60 | from_cache = True 61 | 62 | response = cached_response 63 | else: 64 | # Wrap the response file with a wrapper that will cache the 65 | # response when the stream has been consumed. 66 | response._fp = CallbackFileWrapper( 67 | response._fp, 68 | functools.partial( 69 | self.controller.cache_response, 70 | request, 71 | response, 72 | ) 73 | ) 74 | 75 | resp = super(CacheControlAdapter, self).build_response( 76 | request, response 77 | ) 78 | 79 | # See if we should invalidate the cache. 80 | if request.method in self.invalidating_methods and resp.ok: 81 | cache_url = self.controller.cache_url(request.url) 82 | self.cache.delete(cache_url) 83 | 84 | # Give the request a from_cache attr to let people use it 85 | resp.from_cache = from_cache 86 | 87 | return resp 88 | -------------------------------------------------------------------------------- /cachecontrol/serialize.py: -------------------------------------------------------------------------------- 1 | import io 2 | 3 | from requests.structures import CaseInsensitiveDict 4 | 5 | from .compat import HTTPResponse, pickle 6 | 7 | 8 | class Serializer(object): 9 | 10 | def dumps(self, request, response, body=None): 11 | response_headers = CaseInsensitiveDict(response.headers) 12 | 13 | if body is None: 14 | body = response.read(decode_content=False) 15 | response._fp = io.BytesIO(body) 16 | 17 | data = { 18 | "response": { 19 | "body": body, 20 | "headers": response.headers, 21 | "status": response.status, 22 | "version": response.version, 23 | "reason": response.reason, 24 | "strict": response.strict, 25 | "decode_content": response.decode_content, 26 | }, 27 | } 28 | 29 | # Construct our vary headers 30 | data["vary"] = {} 31 | if "vary" in response_headers: 32 | varied_headers = response_headers['vary'].split(',') 33 | for header in varied_headers: 34 | header = header.strip() 35 | data["vary"][header] = request.headers.get(header, None) 36 | 37 | return b"cc=1," + pickle.dumps(data, pickle.HIGHEST_PROTOCOL) 38 | 39 | def loads(self, request, data): 40 | # Short circuit if we've been given an empty set of data 41 | if not data: 42 | return 43 | 44 | # Determine what version of the serializer the data was serialized 45 | # with 46 | try: 47 | ver, data = data.split(b",", 1) 48 | except ValueError: 49 | ver = b"cc=0" 50 | 51 | # Make sure that our "ver" is actually a version and isn't a false 52 | # positive from a , being in the data stream. 53 | if ver[:3] != b"cc=": 54 | data = ver + data 55 | ver = b"cc=0" 56 | 57 | # Get the version number out of the cc=N 58 | ver = ver.split(b"=", 1)[-1].decode("ascii") 59 | 60 | # Dispatch to the actual load method for the given version 61 | try: 62 | return getattr(self, "_loads_v{0}".format(ver))(request, data) 63 | except AttributeError: 64 | # This is a version we don't have a loads function for, so we'll 65 | # just treat it as a miss and return None 66 | return 67 | 68 | def _loads_v0(self, request, data): 69 | # The original legacy cache data. This doesn't contain enough 70 | # information to construct everything we need, so we'll treat this as 71 | # a miss. 72 | return 73 | 74 | def _loads_v1(self, request, data): 75 | try: 76 | cached = pickle.loads(data) 77 | except ValueError: 78 | return 79 | 80 | # Special case the '*' Vary value as it means we cannot actually 81 | # determine if the cached response is suitable for this request. 82 | if "*" in cached.get("vary", {}): 83 | return 84 | 85 | # Ensure that the Vary headers for the cached response match our 86 | # request 87 | for header, value in cached.get("vary", {}).items(): 88 | if request.headers.get(header, None) != value: 89 | return 90 | 91 | body = io.BytesIO(cached["response"].pop("body")) 92 | return HTTPResponse( 93 | body=body, 94 | preload_content=False, 95 | **cached["response"] 96 | ) 97 | -------------------------------------------------------------------------------- /tests/test_etag.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | 4 | from cachecontrol import CacheControl 5 | from cachecontrol.cache import DictCache 6 | from cachecontrol.compat import urljoin 7 | 8 | 9 | class NullSerializer(object): 10 | 11 | def dumps(self, request, response, body=None): 12 | return response 13 | 14 | def loads(self, request, data): 15 | return data 16 | 17 | 18 | class TestETag(object): 19 | """Test our equal priority caching with ETags 20 | 21 | Equal Priority Caching is a term I've defined to describe when 22 | ETags are cached orthgonally from Time Based Caching. 23 | """ 24 | 25 | @pytest.fixture() 26 | def sess(self, server): 27 | self.etag_url = urljoin(server.application_url, '/etag') 28 | self.update_etag_url = urljoin(server.application_url, '/update_etag') 29 | self.cache = DictCache() 30 | sess = CacheControl( 31 | requests.Session(), 32 | cache=self.cache, 33 | serializer=NullSerializer(), 34 | ) 35 | return sess 36 | 37 | def test_etags_get_example(self, sess, server): 38 | """RFC 2616 14.26 39 | 40 | The If-None-Match request-header field is used with a method to make 41 | it conditional. A client that has one or more entities previously 42 | obtained from the resource can verify that none of those entities 43 | is current by including a list of their associated entity tags in 44 | the If-None-Match header field. The purpose of this feature is to 45 | allow efficient updates of cached information with a minimum amount 46 | of transaction overhead 47 | 48 | If any of the entity tags match the entity tag of the entity that 49 | would have been returned in the response to a similar GET request 50 | (without the If-None-Match header) on that resource, [...] then 51 | the server MUST NOT perform the requested method, [...]. Instead, if 52 | the request method was GET or HEAD, the server SHOULD respond with 53 | a 304 (Not Modified) response, including the cache-related header 54 | fields (particularly ETag) of one of the entities that matched. 55 | 56 | (Paraphrased) A server may provide an ETag header on a response. On 57 | subsequent queries, the client may reference the value of this Etag 58 | header in an If-None-Match header; on receiving such a header, the 59 | server can check whether the entity at that URL has changed from the 60 | clients last version, and if not, it can return a 304 to indicate 61 | the client can use it's current representation. 62 | """ 63 | r = sess.get(self.etag_url) 64 | 65 | # make sure we cached it 66 | assert self.cache.get(self.etag_url) == r.raw 67 | 68 | # make the same request 69 | resp = sess.get(self.etag_url) 70 | assert resp.raw == r.raw 71 | assert resp.from_cache 72 | 73 | # tell the server to change the etags of the response 74 | sess.get(self.update_etag_url) 75 | 76 | resp = sess.get(self.etag_url) 77 | assert resp != r 78 | assert not resp.from_cache 79 | 80 | # Make sure we updated our cache with the new etag'd response. 81 | assert self.cache.get(self.etag_url) == resp.raw 82 | 83 | 84 | class TestDisabledETags(object): 85 | """Test our use of ETags when the response is stale and the 86 | response has an ETag. 87 | """ 88 | @pytest.fixture() 89 | def sess(self, server): 90 | self.etag_url = urljoin(server.application_url, '/etag') 91 | self.update_etag_url = urljoin(server.application_url, '/update_etag') 92 | self.cache = DictCache() 93 | sess = CacheControl(requests.Session(), 94 | cache=self.cache, 95 | cache_etags=False, 96 | serializer=NullSerializer()) 97 | return sess 98 | 99 | def test_expired_etags_if_none_match_response(self, sess): 100 | """Make sure an expired response that contains an ETag uses 101 | the If-None-Match header. 102 | """ 103 | # get our response 104 | r = sess.get(self.etag_url) 105 | 106 | # expire our request by changing the date. Our test endpoint 107 | # doesn't provide time base caching headers, so we add them 108 | # here in order to expire the request. 109 | r.headers['Date'] = 'Tue, 26 Nov 2012 00:50:49 GMT' 110 | self.cache.set(self.etag_url, r) 111 | 112 | r = sess.get(self.etag_url) 113 | assert r.from_cache 114 | assert 'if-none-match' in r.request.headers 115 | assert r.status_code == 200 116 | -------------------------------------------------------------------------------- /docs/etags.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | ETag Support 3 | ============== 4 | 5 | CacheControl's support of ETags is slightly different than 6 | httplib2. In httplib2, an ETag is considered when using a cached 7 | response when the cache is considered stale. When a cached response is 8 | expired and it has an ETag header, it returns a response with the 9 | appropriate `If-None-Match` header. We'll call this behavior a **Time 10 | Priority** cache as the ETag support only takes effect when the time has 11 | expired. 12 | 13 | In CacheControl the default behavior when an ETag an sent by the 14 | server is to cache the response. We'll refer to this pattern as a 15 | **Equal Priority** cache as the decision to cache is either time base or 16 | due to the presense of an ETag. 17 | 18 | The spec is not explicit what takes priority when caching with both 19 | ETags and time based headers. Therefore, CacheControl supports the 20 | different mechanisms via configuration where possible. 21 | 22 | 23 | Turning Off Equal Priority Caching 24 | ================================== 25 | 26 | The danger in Equal Priority Caching is that a server that returns 27 | ETag headers for every request may fill up your cache. You can disable 28 | Equal Priority Caching and utilize a Time Priority algorithm like 29 | httplib2. :: 30 | 31 | import requests 32 | from cachecontrol import CacheControl 33 | 34 | sess = CacheControl(requests.Session(), cache_etags=False) 35 | 36 | This will only utilize ETags when they exist within the context of 37 | time based caching headers. If a response has time base caching 38 | headers that are valid along with an ETag, we will still attempt to 39 | handle a 304 Not Modified even though the cached value as 40 | expired. Here is a simple example. :: 41 | 42 | # Server response 43 | GET /foo.html 44 | Date: Tue, 26 Nov 2013 00:50:49 GMT 45 | Cache-Control: max-age=3000 46 | ETag: JAsUYM8K 47 | 48 | On a subsequent request, if the cache has expired, the next request 49 | will still include the `If-None-Match` header. The cached response 50 | will remain in the cache awaiting the response. :: 51 | 52 | # Client request 53 | GET /foo.html 54 | If-None-Match: JAsUYM8K 55 | 56 | If the server returns a `304 Not Modified`, it will use the stale 57 | cached value, updating the headers from the most recent request. :: 58 | 59 | # Server response 60 | GET /foo.html 61 | Date: Tue, 26 Nov 2013 01:30:19 GMT 62 | Cache-Control: max-age=3000 63 | ETag: JAsUYM8K 64 | 65 | If the server returns a `200 OK`, the cache will be updated 66 | accordingly. 67 | 68 | 69 | Equal Priority Caching Benefits 70 | =============================== 71 | 72 | The benefits of equal priority caching is that you have two orthogonal 73 | means of introducing a cache. The time based cache provides an 74 | effective way to reduce the load on requests that can be eventually 75 | consistent. Static resource are a great example of when time based 76 | caching is effective. 77 | 78 | The ETag based cache is effective for working with documents that are 79 | larger and/or need to be correct immediately after changes. For 80 | example, if you exported some data from a large database, the file 81 | could be 10 GBs. Being able to send an ETag with this sort of request 82 | an know the version you have locally is valid saves a ton of bandwidth 83 | and time. 84 | 85 | Likewise, if you have a resource that you want to update, you can be 86 | confident there will not be a `lost update`_ because you have local 87 | version that is stale. 88 | 89 | 90 | Endpoint Specific Caching 91 | ========================= 92 | 93 | It should be pointed out that there are times when an endpoint is 94 | specifically tailored for different caching techniques. If you have a 95 | RESTful service, there might be endpoints that are specifically meant 96 | to be cached via time based caching techniques where as other 97 | endpoints should focus on using ETags. In this situation it is 98 | recommended that you use the `CacheControlAdapter` directly. :: 99 | 100 | import requests 101 | from cachecontrol import CacheControlAdapter 102 | from cachecontrol.caches import RedisCache 103 | 104 | # using django for an idea on where you might get a 105 | # username/password. 106 | from django.conf import settings 107 | 108 | # a function to return a redis connection all the instances of the 109 | # app may use. this allows updates to the API (ie PUT) to invalidate 110 | # the cache for other users. 111 | from myapp.db import redis_connection 112 | 113 | 114 | # create our session 115 | client = sess.Session(auth=(settings.user, settings.password)) 116 | 117 | # we have a gettext like endpoint. this doesn't get updated very 118 | # often so a time based cache is a helpful way to reduce many small 119 | # requests. 120 | client.mount('http://myapi.foo.com/gettext/', 121 | CacheControlAdapter(cache_etags=False)) 122 | 123 | 124 | # here we have user profile endpoint that lets us update information 125 | # about users. we need this to be consistent immediately after a user 126 | # updates some information because another node might handle the 127 | # request. It uses the global redis cache to coordinate the cache and 128 | # uses the equal priority caching to be sure etags are used by default. 129 | redis_cache = RedisCache(redis_connection()) 130 | client.mount('http://myapi.foo.com/user_profiles/', 131 | CacheControlAdapter(cache=redis_cache)) 132 | 133 | Hopefully this more indepth example reveals how to configure a 134 | `requests.Session` to better utilize ETag based caching vs. Time 135 | Priority Caching. 136 | 137 | .. _lost update: http://www.w3.org/1999/04/Editing/ 138 | -------------------------------------------------------------------------------- /tests/test_cache_control.py: -------------------------------------------------------------------------------- 1 | """ 2 | Unit tests that verify our caching methods work correctly. 3 | """ 4 | import pytest 5 | from mock import ANY, Mock 6 | import time 7 | 8 | from cachecontrol import CacheController 9 | from cachecontrol.cache import DictCache 10 | 11 | 12 | TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" 13 | 14 | 15 | class NullSerializer(object): 16 | 17 | def dumps(self, request, response): 18 | return response 19 | 20 | def loads(self, request, data): 21 | return data 22 | 23 | 24 | class TestCacheControllerResponse(object): 25 | url = 'http://url.com/' 26 | 27 | def req(self, headers=None): 28 | headers = headers or {} 29 | return Mock(full_url=self.url, # < 1.x support 30 | url=self.url, 31 | headers=headers) 32 | 33 | def resp(self, headers=None): 34 | headers = headers or {} 35 | return Mock(status=200, 36 | headers=headers, 37 | request=self.req(), 38 | read=lambda **k: b"testing") 39 | 40 | @pytest.fixture() 41 | def cc(self): 42 | # Cache controller fixture 43 | return CacheController(Mock(), serializer=Mock()) 44 | 45 | def test_no_cache_non_20x_response(self, cc): 46 | # No caching without some extra headers, so we add them 47 | now = time.strftime(TIME_FMT, time.gmtime()) 48 | resp = self.resp({'cache-control': 'max-age=3600', 49 | 'date': now}) 50 | 51 | no_cache_codes = [201, 300, 400, 500] 52 | for code in no_cache_codes: 53 | resp.status = code 54 | cc.cache_response(Mock(), resp) 55 | assert not cc.cache.set.called 56 | 57 | # this should work b/c the resp is 20x 58 | resp.status = 203 59 | cc.cache_response(self.req(), resp) 60 | assert cc.serializer.dumps.called 61 | assert cc.cache.set.called 62 | 63 | def test_no_cache_with_no_date(self, cc): 64 | # No date header which makes our max-age pointless 65 | resp = self.resp({'cache-control': 'max-age=3600'}) 66 | cc.cache_response(self.req(), resp) 67 | 68 | assert not cc.cache.set.called 69 | 70 | def test_cache_response_no_cache_control(self, cc): 71 | resp = self.resp() 72 | cc.cache_response(self.req(), resp) 73 | 74 | assert not cc.cache.set.called 75 | 76 | def test_cache_response_cache_max_age(self, cc): 77 | now = time.strftime(TIME_FMT, time.gmtime()) 78 | resp = self.resp({'cache-control': 'max-age=3600', 79 | 'date': now}) 80 | req = self.req() 81 | cc.cache_response(req, resp) 82 | cc.serializer.dumps.assert_called_with(req, resp, body=None) 83 | cc.cache.set.assert_called_with(self.url, ANY) 84 | 85 | def test_cache_repsonse_no_store(self): 86 | resp = Mock() 87 | cache = DictCache({self.url: resp}) 88 | cc = CacheController(cache) 89 | 90 | cache_url = cc.cache_url(self.url) 91 | 92 | resp = self.resp({'cache-control': 'no-store'}) 93 | assert cc.cache.get(cache_url) 94 | 95 | cc.cache_response(self.req(), resp) 96 | assert not cc.cache.get(cache_url) 97 | 98 | 99 | class TestCacheControlRequest(object): 100 | url = 'http://foo.com/bar' 101 | 102 | def setup(self): 103 | self.c = CacheController( 104 | DictCache(), 105 | serializer=NullSerializer(), 106 | ) 107 | 108 | def req(self, headers): 109 | return self.c.cached_request(Mock(url=self.url, headers=headers)) 110 | 111 | def test_cache_request_no_cache(self): 112 | resp = self.req({'cache-control': 'no-cache'}) 113 | assert not resp 114 | 115 | def test_cache_request_pragma_no_cache(self): 116 | resp = self.req({'pragma': 'no-cache'}) 117 | assert not resp 118 | 119 | def test_cache_request_no_store(self): 120 | resp = self.req({'cache-control': 'no-store'}) 121 | assert not resp 122 | 123 | def test_cache_request_max_age_0(self): 124 | resp = self.req({'cache-control': 'max-age=0'}) 125 | assert not resp 126 | 127 | def test_cache_request_not_in_cache(self): 128 | resp = self.req({}) 129 | assert not resp 130 | 131 | def test_cache_request_fresh_max_age(self): 132 | now = time.strftime(TIME_FMT, time.gmtime()) 133 | resp = Mock(headers={'cache-control': 'max-age=3600', 134 | 'date': now}) 135 | 136 | cache = DictCache({self.url: resp}) 137 | self.c.cache = cache 138 | r = self.req({}) 139 | assert r == resp 140 | 141 | def test_cache_request_unfresh_max_age(self): 142 | earlier = time.time() - 3700 # epoch - 1h01m40s 143 | now = time.strftime(TIME_FMT, time.gmtime(earlier)) 144 | resp = Mock(headers={'cache-control': 'max-age=3600', 145 | 'date': now}) 146 | self.c.cache = DictCache({self.url: resp}) 147 | r = self.req({}) 148 | assert not r 149 | 150 | def test_cache_request_fresh_expires(self): 151 | later = time.time() + 86400 # GMT + 1 day 152 | expires = time.strftime(TIME_FMT, time.gmtime(later)) 153 | now = time.strftime(TIME_FMT, time.gmtime()) 154 | resp = Mock(headers={'expires': expires, 155 | 'date': now}) 156 | cache = DictCache({self.url: resp}) 157 | self.c.cache = cache 158 | r = self.req({}) 159 | assert r == resp 160 | 161 | def test_cache_request_unfresh_expires(self): 162 | sooner = time.time() - 86400 # GMT - 1 day 163 | expires = time.strftime(TIME_FMT, time.gmtime(sooner)) 164 | now = time.strftime(TIME_FMT, time.gmtime()) 165 | resp = Mock(headers={'expires': expires, 166 | 'date': now}) 167 | cache = DictCache({self.url: resp}) 168 | self.c.cache = cache 169 | r = self.req({}) 170 | assert not r 171 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/CacheControl.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/CacheControl.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/CacheControl" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/CacheControl" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # CacheControl documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Nov 4 15:01:23 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.viewcode'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'CacheControl' 44 | copyright = u'2013, Eric Larson' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '0.6' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '0.6' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'CacheControldoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'CacheControl.tex', u'CacheControl Documentation', 187 | u'Eric Larson', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'cachecontrol', u'CacheControl Documentation', 217 | [u'Eric Larson'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'CacheControl', u'CacheControl Documentation', 231 | u'Eric Larson', 'CacheControl', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | -------------------------------------------------------------------------------- /cachecontrol/controller.py: -------------------------------------------------------------------------------- 1 | """ 2 | The httplib2 algorithms ported for use with requests. 3 | """ 4 | import re 5 | import calendar 6 | import time 7 | 8 | from requests.structures import CaseInsensitiveDict 9 | 10 | from .cache import DictCache 11 | from .compat import parsedate_tz 12 | from .serialize import Serializer 13 | 14 | 15 | URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") 16 | 17 | 18 | def parse_uri(uri): 19 | """Parses a URI using the regex given in Appendix B of RFC 3986. 20 | 21 | (scheme, authority, path, query, fragment) = parse_uri(uri) 22 | """ 23 | groups = URI.match(uri).groups() 24 | return (groups[1], groups[3], groups[4], groups[6], groups[8]) 25 | 26 | 27 | class CacheController(object): 28 | """An interface to see if request should cached or not. 29 | """ 30 | def __init__(self, cache=None, cache_etags=True, serializer=None): 31 | self.cache = cache or DictCache() 32 | self.cache_etags = cache_etags 33 | self.serializer = serializer or Serializer() 34 | 35 | def _urlnorm(self, uri): 36 | """Normalize the URL to create a safe key for the cache""" 37 | (scheme, authority, path, query, fragment) = parse_uri(uri) 38 | if not scheme or not authority: 39 | raise Exception("Only absolute URIs are allowed. uri = %s" % uri) 40 | authority = authority.lower() 41 | scheme = scheme.lower() 42 | if not path: 43 | path = "/" 44 | 45 | # Could do syntax based normalization of the URI before 46 | # computing the digest. See Section 6.2.2 of Std 66. 47 | request_uri = query and "?".join([path, query]) or path 48 | scheme = scheme.lower() 49 | defrag_uri = scheme + "://" + authority + request_uri 50 | 51 | return defrag_uri 52 | 53 | def cache_url(self, uri): 54 | return self._urlnorm(uri) 55 | 56 | def parse_cache_control(self, headers): 57 | """ 58 | Parse the cache control headers returning a dictionary with values 59 | for the different directives. 60 | """ 61 | retval = {} 62 | 63 | cc_header = 'cache-control' 64 | if 'Cache-Control' in headers: 65 | cc_header = 'Cache-Control' 66 | 67 | if cc_header in headers: 68 | parts = headers[cc_header].split(',') 69 | parts_with_args = [ 70 | tuple([x.strip().lower() for x in part.split("=", 1)]) 71 | for part in parts if -1 != part.find("=")] 72 | parts_wo_args = [(name.strip().lower(), 1) 73 | for name in parts if -1 == name.find("=")] 74 | retval = dict(parts_with_args + parts_wo_args) 75 | return retval 76 | 77 | def cached_request(self, request): 78 | cache_url = self.cache_url(request.url) 79 | cc = self.parse_cache_control(request.headers) 80 | 81 | # non-caching states 82 | no_cache = True if 'no-cache' in cc else False 83 | if 'max-age' in cc and cc['max-age'] == 0: 84 | no_cache = True 85 | 86 | # Bail out if no-cache was set 87 | if no_cache: 88 | return False 89 | 90 | # It is in the cache, so lets see if it is going to be 91 | # fresh enough 92 | resp = self.serializer.loads(request, self.cache.get(cache_url)) 93 | 94 | # Check to see if we have a cached object 95 | if not resp: 96 | return False 97 | 98 | headers = CaseInsensitiveDict(resp.headers) 99 | 100 | now = time.time() 101 | date = calendar.timegm( 102 | parsedate_tz(headers['date']) 103 | ) 104 | current_age = max(0, now - date) 105 | 106 | # TODO: There is an assumption that the result will be a 107 | # urllib3 response object. This may not be best since we 108 | # could probably avoid instantiating or constructing the 109 | # response until we know we need it. 110 | resp_cc = self.parse_cache_control(headers) 111 | 112 | # determine freshness 113 | freshness_lifetime = 0 114 | if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): 115 | freshness_lifetime = int(resp_cc['max-age']) 116 | elif 'expires' in headers: 117 | expires = parsedate_tz(headers['expires']) 118 | if expires is not None: 119 | expire_time = calendar.timegm(expires) - date 120 | freshness_lifetime = max(0, expire_time) 121 | 122 | # determine if we are setting freshness limit in the req 123 | if 'max-age' in cc: 124 | try: 125 | freshness_lifetime = int(cc['max-age']) 126 | except ValueError: 127 | freshness_lifetime = 0 128 | 129 | if 'min-fresh' in cc: 130 | try: 131 | min_fresh = int(cc['min-fresh']) 132 | except ValueError: 133 | min_fresh = 0 134 | # adjust our current age by our min fresh 135 | current_age += min_fresh 136 | 137 | # see how fresh we actually are 138 | fresh = (freshness_lifetime > current_age) 139 | 140 | if fresh: 141 | return resp 142 | 143 | # we're not fresh. If we don't have an Etag, clear it out 144 | if 'etag' not in headers: 145 | self.cache.delete(cache_url) 146 | 147 | # return the original handler 148 | return False 149 | 150 | def conditional_headers(self, request): 151 | cache_url = self.cache_url(request.url) 152 | resp = self.serializer.loads(request, self.cache.get(cache_url)) 153 | new_headers = {} 154 | 155 | if resp: 156 | headers = CaseInsensitiveDict(resp.headers) 157 | 158 | if 'etag' in headers: 159 | new_headers['If-None-Match'] = headers['ETag'] 160 | 161 | if 'last-modified' in headers: 162 | new_headers['If-Modified-Since'] = headers['Last-Modified'] 163 | 164 | return new_headers 165 | 166 | def cache_response(self, request, response, body=None): 167 | """ 168 | Algorithm for caching requests. 169 | 170 | This assumes a requests Response object. 171 | """ 172 | # From httplib2: Don't cache 206's since we aren't going to 173 | # handle byte range requests 174 | if response.status not in [200, 203]: 175 | return 176 | 177 | response_headers = CaseInsensitiveDict(response.headers) 178 | 179 | cc_req = self.parse_cache_control(request.headers) 180 | cc = self.parse_cache_control(response_headers) 181 | 182 | cache_url = self.cache_url(request.url) 183 | 184 | # Delete it from the cache if we happen to have it stored there 185 | no_store = cc.get('no-store') or cc_req.get('no-store') 186 | if no_store and self.cache.get(cache_url): 187 | self.cache.delete(cache_url) 188 | 189 | # If we've been given an etag, then keep the response 190 | if self.cache_etags and 'etag' in response_headers: 191 | self.cache.set( 192 | cache_url, 193 | self.serializer.dumps(request, response, body=body), 194 | ) 195 | 196 | # Add to the cache if the response headers demand it. If there 197 | # is no date header then we can't do anything about expiring 198 | # the cache. 199 | elif 'date' in response_headers: 200 | # cache when there is a max-age > 0 201 | if cc and cc.get('max-age'): 202 | if int(cc['max-age']) > 0: 203 | self.cache.set( 204 | cache_url, 205 | self.serializer.dumps(request, response, body=body), 206 | ) 207 | 208 | # If the request can expire, it means we should cache it 209 | # in the meantime. 210 | elif 'expires' in response_headers: 211 | if response_headers['expires']: 212 | self.cache.set( 213 | cache_url, 214 | self.serializer.dumps(request, response, body=body), 215 | ) 216 | 217 | def update_cached_response(self, request, response): 218 | """On a 304 we will get a new set of headers that we want to 219 | update our cached value with, assuming we have one. 220 | 221 | This should only ever be called when we've sent an ETag and 222 | gotten a 304 as the response. 223 | """ 224 | cache_url = self.cache_url(request.url) 225 | 226 | cached_response = self.serializer.loads(request, self.cache.get(cache_url)) 227 | 228 | if not cached_response: 229 | # we didn't have a cached response 230 | return response 231 | 232 | # did so lets update our headers 233 | cached_response.headers.update(response.headers) 234 | 235 | # we want a 200 b/c we have content via the cache 236 | cached_response.status = 200 237 | 238 | # update our cache 239 | self.cache.set( 240 | cache_url, 241 | self.serializer.dumps(request, cached_response), 242 | ) 243 | 244 | return cached_response 245 | --------------------------------------------------------------------------------