├── 2012
    └── 12
    │   ├── 4
    │       └── hello-internet.rst
    │   └── 5
    │       └── python-spell-checker.rst
├── 2013
    ├── 1
    │   └── 5
    │   │   └── decorates-and-annotations.rst
    └── 2
    │   ├── 4
    │       └── joel-test-for-data-teams.rst
    │   └── 24
    │       └── timing-python-code.rst
├── .DS_Store
├── config.yml
├── Makefile
├── README.md
├── _build
    ├── 2012
    │   ├── 12
    │   │   ├── 4
    │   │   │   └── hello-internet
    │   │   │   │   └── index.html
    │   │   ├── 5
    │   │   │   └── python-spell-checker
    │   │   │   │   └── index.html
    │   │   └── index.html
    │   └── index.html
    ├── 2013
    │   ├── 1
    │   │   └── 5
    │   │   │   └── decorates-and-annotations
    │   │   │       └── index.html
    │   ├── 2
    │   │   ├── 4
    │   │   │   └── joel-test-for-data-teams
    │   │   │   │   └── index.html
    │   │   └── 24
    │   │   │   └── timing-python-code
    │   │   │       └── index.html
    │   ├── index.html
    │   ├── 02
    │   │   └── index.html
    │   └── 01
    │   │   └── index.html
    ├── README.md
    ├── upload.sh
    ├── tags
    │   ├── Thoughts
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── performance
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── bayes
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── introduction
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── statistics
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── probability
    │   │   ├── index.html
    │   │   └── feed.atom
    │   ├── python
    │   │   └── index.html
    │   └── index.html
    ├── archive
    │   └── index.html
    ├── static
    │   ├── _pygments.css
    │   └── style.css
    ├── about
    │   └── index.html
    └── index.html
├── _templates
    ├── tagcloud.html
    ├── tag.html
    ├── _pagination.html
    ├── blog
    │   ├── year_archive.html
    │   ├── month_archive.html
    │   ├── archive.html
    │   └── index.html
    ├── rst_display.html
    └── layout.html
├── upload.sh
├── .gitignore
├── about.rst
└── static
    └── style.css


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mattalcock/blog/HEAD/.DS_Store


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
1 | active_modules: [pygments, tags, blog, latex]
2 | author: Matt Alcock
3 | canonical_url: http://blog.mattalcock.com/
4 | modules:
5 |   pygments:
6 |     style: tango


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: build upload
 2 | 
 3 | clean:
 4 | 	rm -rf _build
 5 | 
 6 | build:
 7 | 	run-rstblog build
 8 | 
 9 | serve:
10 | 	run-rstblog serve
11 | 
12 | upload:
13 | 	./upload.sh
14 | 	@echo "Done..."


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | blog
 2 | ====
 3 | 
 4 | My personal blog
 5 | 
 6 | 
 7 | Building blog command....
 8 | 
 9 | >run-rstblog build
10 | 
11 | Running blog command....
12 | 
13 | >run-rstblog serve
14 | 
15 | Copy blog command....
16 | 
17 | >upload.sh (work in progress)


--------------------------------------------------------------------------------
/_build/README.md:
--------------------------------------------------------------------------------
 1 | blog
 2 | ====
 3 | 
 4 | My personal blog
 5 | 
 6 | 
 7 | Building blog command....
 8 | 
 9 | >run-rstblog build
10 | 
11 | Running blog command....
12 | 
13 | >run-rstblog serve
14 | 
15 | Copy blog command....
16 | 
17 | >upload.sh (work in progress)


--------------------------------------------------------------------------------
/_templates/tagcloud.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Tags{% endblock %}
 3 | {% block body %}
 4 |   <h1>Tags</h1>
 5 |   <ul class=tagcloud>
 6 |   {%- for tag in get_tags(limit=50) %}
 7 |     <li><a href="{{ link_to('tag', tag=tag.name) }}" style="font-size: {{
 8 |       tag.size }}%">{{ tag.name }}</a>
 9 |   {%- endfor %}
10 |   </ul>
11 | {% endblock %}
12 | 


--------------------------------------------------------------------------------
/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | HOST='ftp.mattalcock.com'
 3 | USER='mattalc1'
 4 | TARGETFOLDER='/public_html/blog'
 5 | SOURCEFOLDER='/Users/mattalcock/Dev/blog/_build'
 6 | 
 7 | #lftp was installed using  'brew install lftp'
 8 | 
 9 | lftp -f "
10 | open $HOST
11 | user $USER $BLOGPASS
12 | lcd $SOURCEFOLDER
13 | mirror --reverse --delete --verbose $SOURCEFOLDER $TARGETFOLDER
14 | bye
15 | "


--------------------------------------------------------------------------------
/_build/upload.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | HOST='ftp.mattalcock.com'
 3 | USER='mattalc1'
 4 | TARGETFOLDER='/public_html/blog'
 5 | SOURCEFOLDER='/Users/mattalcock/Dev/blog/_build'
 6 | 
 7 | #lftp was installed using  'brew install lftp'
 8 | 
 9 | lftp -f "
10 | open $HOST
11 | user $USER $BLOGPASS
12 | lcd $SOURCEFOLDER
13 | mirror --reverse --delete --verbose $SOURCEFOLDER $TARGETFOLDER
14 | bye
15 | "


--------------------------------------------------------------------------------
/_templates/tag.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Entries tagged “{{ tag.name }}”{% endblock %}
 3 | {% block body %}
 4 |   <h1>Entries <a href="{{ link_to('tagcloud') }}">tagged</a> “{{ tag.name }}”</h1>
 5 |   <ul>
 6 |   {%- for entry in entries %}
 7 |     <li><a href="{{ link_to('page', slug=entry.slug) }}">{{ entry.title }}</a>
 8 |   {%- endfor %}
 9 |   </ul>
10 | {% endblock %}
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 


--------------------------------------------------------------------------------
/_templates/_pagination.html:
--------------------------------------------------------------------------------
 1 | <div class=pagination>
 2 |   {% if pagination.has_prev -%}
 3 |     <a href="{{ link_to(pagination.url_key, page=pagination.page - 1) }}">&laquo; Previous</a>
 4 |   {%- else -%}
 5 |     <span class=disabled>&laquo; Previous</span>
 6 |   {%- endif %}{#
 7 |   #} — <strong>{{ pagination.page }}</strong> — {#
 8 |   #}{% if pagination.has_next -%}
 9 |     <a href="{{ link_to(pagination.url_key, page=pagination.page + 1) }}">Next &raquo;</a>
10 |   {%- else -%}
11 |     <span class=disabled>Next &raquo;</span>
12 |   {%- endif %}
13 | </div>
14 | 


--------------------------------------------------------------------------------
/_templates/blog/year_archive.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Blog Archive for {{ entry.year }}{% endblock %}
 3 | {% block body %}
 4 | 
 5 |   <h1>Blog <a href="{{ link_to('blog_archive') }}">Archive</a> for {{ entry.year }}</h1>
 6 | 
 7 |   <ul class=blog-archive>
 8 |     {%- for subentry in entry.months %}
 9 |       <li><a href="{{ link_to('blog_archive', year=entry.year,
10 |                               month=subentry.month) }}">{{ subentry.month_name }}</a>
11 |         — {{ subentry.count }} {{ 'entry' if subentry.count == 1 else 'entries' }}
12 |     {%- endfor %}
13 |   </ul>
14 | {% endblock %}
15 | 


--------------------------------------------------------------------------------
/_templates/blog/month_archive.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Blog Archive for {{ entry.month_name}}, {{ entry.year }}{% endblock %}
 3 | {% block body %}
 4 | 
 5 |   <h1>Blog <a href="{{ link_to('blog_archive') }}">Archive</a> for {{ entry.month_name }},
 6 |     <a href="{{ link_to('blog_archive', year=entry.year) }}">{{ entry.year }}</a></h1>
 7 | 
 8 |   <ul class=blog-archive>
 9 |     {%- for entry in entry.entries %}
10 |       <li><a href="{{ link_to('page', slug=entry.slug) }}">{{ entry.title }}</a>
11 |         {%- if entry.pub_date %}, {{ format_date(entry.pub_date, format='long') }}{% endif %}
12 |     {%- endfor %}
13 |   </ul>
14 | {% endblock %}
15 | 


--------------------------------------------------------------------------------
/_templates/blog/archive.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Blog Archive{% endblock %}
 3 | {% block body %}
 4 | 
 5 |   <h1>Blog Archive</h1>
 6 | 
 7 |   <ul class=blog-archive>
 8 |   {%- for entry in archive %}
 9 |     <li><a href="{{ link_to('blog_archive', year=entry.year) }}">{{ entry.year }}</a>
10 |       <ul>
11 |       {%- for subentry in entry.months %}
12 |         <li><a href="{{ link_to('blog_archive', year=entry.year,
13 |                                 month=subentry.month) }}">{{ subentry.month_name }}</a>
14 |           — {{ subentry.count }} {{ 'entry' if subentry.count == 1 else 'entries' }}
15 |       {%- endfor %}
16 |       </ul>
17 |   {%- endfor %}
18 |   </ul>
19 | {% endblock %}
20 | 


--------------------------------------------------------------------------------
/_templates/rst_display.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}{{ rst.title }}{% endblock %}
 3 | {% block body %}
 4 |   {%- if not config.hide_title %}
 5 |   {{ rst.html_title }}
 6 |   {%- endif %}
 7 |   {% if ctx.pub_date %}
 8 |   <p class=date>written on {{ format_date(ctx.pub_date, format='full') }}
 9 |   {% endif %}
10 | 
11 |   {{ rst.fragment }}
12 | 
13 |   {% if ctx.tags %}
14 |   <p class=tags>This entry was tagged
15 |     {% for tag in ctx.tags|sort(case_sensitive=true) %}
16 |       {%- if not loop.first and not loop.last %}, {% endif -%}
17 |       {%- if loop.last and not loop.first %} and {% endif %}
18 |       <a href="{{ link_to('tag', tag=tag) }}">{{ tag }}</a>
19 |     {%- endfor %}
20 |   {% endif %}
21 | 
22 |   {% if 'disqus' in config.active_modules %}
23 |     {{ get_disqus() }}
24 |   {% endif %}
25 | {% endblock %}
26 | 


--------------------------------------------------------------------------------
/_templates/blog/index.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | {% block title %}Blog{% endblock %}
 3 | {% block body %}
 4 | 
 5 |   {%- for entry in pagination.get_slice() %}
 6 |   <div class=entry-overview>
 7 |     <div class=date>{{ format_date(entry.pub_date, format='medium') }}</div>
 8 |     <div class=detail>
 9 |       <h1><a href="{{ link_to('page', slug=entry.slug) }}">{{ entry.title }}</a></h1>
10 |       {% if entry.summary %}
11 |       <div class=summary>{{ entry.render_summary() }}</div>
12 |       {% endif %}
13 |         {% if entry.tags %}
14 |         <div class=summary-tags>
15 |             {% for tag in entry.tags|sort(case_sensitive=true) %}
16 |               {%- if not loop.first and not loop.last %}, {% endif -%}
17 |               {%- if loop.last and not loop.first %} and {% endif %}
18 |               <a href="{{ link_to('tag', tag=tag) }}">#{{ tag }}</a>
19 |             {%- endfor %}
20 |         </div>
21 |         {% endif %}
22 |     </div>
23 |   </div>
24 |   {%- endfor %}  
25 | 
26 |   {% if show_pagination and pagination.pages > 1 %}
27 |     {{ pagination }}
28 |   {% endif %} 
29 | {% endblock %}
30 | 


--------------------------------------------------------------------------------
/about.rst:
--------------------------------------------------------------------------------
 1 | public: yes
 2 | 
 3 | About Me
 4 | ========
 5 | 
 6 | My name is Matt Alcock and I'm a Data Scientist, Analytics Lead and Python fan.  I'm currently the Lead Analyst at NaturalMotion Games where I manage a small team of data analysts. I currently work and live in Oxford. I've worked in small startups and large multinational companies covering a variety of industries including games, finance and fashion. I've been working with data for 10+ years and although my jobs have been varried they've all centered around drawing insight from large data sets. 
 7 | 
 8 | I split the majority of my time between Oxford and London. If you'd like to meet for a coffee or discuss anything please  drop me a message through one of the following channels:
 9 | 
10 | -   drop me an `email <mattalcock@mac.com>`_
11 | -   send a tweet to `@mattalcock <http://twitter.com/mattalcock>`_
12 | -   send me a driect message on `LinkedIn <http://www.linkedin.com/pub/matt-alcock/2b/16b/9b2>`_
13 | 
14 | 
15 | About this Website
16 | ------------------
17 | 
18 | The website is a collection of observations, thoughts, notes and side projects. A lot of the supporting code for the blog posts can be found in a public repo under my `github account <http://github.com/mattalcock>`_.
19 | 
20 | The website itself is written in restructured text and built with a small
21 | script written by the very talented Armin Ronacher.  Sourcecode can be `found on github
22 | <http://github.com/mitsuhiko/lucumr>`_.


--------------------------------------------------------------------------------
/2013/2/4/joel-test-for-data-teams.rst:
--------------------------------------------------------------------------------
 1 | public: no
 2 | tags: [python, introduction]
 3 | pub_date: 2013-02-04
 4 | summary: |
 5 |   The 'Data Team Test' a Joel Test for data and analytics teams.
 6 | 
 7 | The 'Data Team Test'
 8 | ====================
 9 | 
10 | The original 'Joel Test'.
11 |  
12 | Do you use source control?
13 | Can you make a build in one step?
14 | Do you make daily builds?
15 | Do you have a bug database?
16 | Do you fix bugs before writing new code?
17 | Do you have an up-to-date schedule?
18 | Do you have a spec?
19 | Do programmers have quiet working conditions?
20 | Do you use the best tools money can buy?
21 | Do you have testers?
22 | Do new candidates write code during their interview?
23 | Do you do hallway usability testing?
24 | 
25 | 
26 | The 'Data Team Test'.
27 | 
28 | Do you use source control?
29 | Do you have a seperate devlopment and production enviroment?
30 | Do you have access to raw data in a warehouse or other offline store that doesn't impact the live system?
31 | Do you have a way of recording and marking dirty data?
32 | Do you record bugs and fix them before writing new code?
33 | Do you have a process to schedule and prioritse analysis?
34 | Do you spend time with product owners and domain experts?
35 | Do programmers/analysts have quiet working conditions?
36 | Do you use the best tools money can buy?
37 | Do you have testers?
38 | Do you do have a mechanism to communicate findings and make recommendations?
39 | Do you and other teams employ data driven development teqniques?


--------------------------------------------------------------------------------
/2012/12/4/hello-internet.rst:
--------------------------------------------------------------------------------
 1 | public: yes
 2 | tags: [thoughts]
 3 | pub_date: 2012-12-04
 4 | summary: |
 5 |   The obligatory first post.
 6 | 
 7 | Hello Internet
 8 | ==============
 9 | 
10 | The oblatory first post. I'll be honest I've been meaning to start and commit to a blog for sometime. After some false starts covering my broad spectrum of interests I've decided to focus on writing about my thoughts as a Data Scientist. I'm hoping people will find this informative and insightful. If there are any thoughts, feedback or collaborations that come from this then the blog will have been a success. So please let me know what you think.
11 | 
12 | I've been working with data for 10 years and my job title has jumped around from Developer, Data Analyst, Quantitate Analyst, Team Leader, Product Manager, Warehouse Manager, Head Of Analytics, Lead Analyst and Data Scientist. So what am I? I'm not sure everyone fits into role buckets but one thing I am convinced of is that everyones interests and expertise is different. I enjoy managing small technical teams, I love working with large amounts of data and I have the expertise to apply statistical and scientific methods to my work. 
13 | 
14 | Below are some biases and opinions I should mention before I start. I'll explain these in more detail over the coming posts but their four personal and somewhat subjective opinions I wanted to share from the outset.
15 | `
16 | -    I'm love the power of modern NoSQL data stores but still feel SQL is an amazing tool for analysis thats hard to beat.
17 | -    I think data can unlock questions and give insights into almost every area of business but also I understand it's not the silver bullet. It should be used with creativity, lateral thinking and domain expertise not instead of it.
18 | -    I love the concise power of statistics but realise they're frequently misleading and often poorly presented and explained.
19 | -    I'm program language agnostic but love to use Python
20 | 
21 | The blog will contain, thoughts, tools, projects and some book reviews. If there is anything you'd like me to talk about or review please drop me an mail.
22 | 
23 | I hope you like whats to come.........
24 | 


--------------------------------------------------------------------------------
/_build/tags/Thoughts/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “thoughts” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “thoughts”</h1>
27 |   <ul>
28 |     <li><a href="/2012/12/4/hello-internet">Hello Internet</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/tags/performance/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “performance” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “performance”</h1>
27 |   <ul>
28 |     <li><a href="/2013/2/24/timing-python-code">Timing Python Code</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/2012/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive for 2012 | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog <a href="/archive/">Archive</a> for 2012</h1>
28 | 
29 |   <ul class=blog-archive>
30 |       <li><a href="/2012/12/">December</a>
31 |         — 2 entries
32 |   </ul>
33 | 
34 |       </div>
35 |       <div class=footer>
36 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
37 |         <p>&nbsp;
38 |         <p>&copy; Copyright 2013 by Matt Alcock. 
39 |         <p>
40 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
41 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
42 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
43 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
44 |         <p>
45 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
46 |       </div>
47 |     </div>
48 |     <script type="text/javascript">
49 | 
50 |       var _gaq = _gaq || [];
51 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
52 |       _gaq.push(['_trackPageview']);
53 | 
54 |       (function() {
55 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
56 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
57 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
58 |       })();
59 | 
60 |     </script>
61 |   </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/_build/tags/bayes/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “bayes” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “bayes”</h1>
27 |   <ul>
28 |     <li><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/tags/introduction/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “introduction” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “introduction”</h1>
27 |   <ul>
28 |     <li><a href="/2013/1/5/decorates-and-annotations">Decorators &amp; Annotations</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/tags/statistics/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “statistics” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “statistics”</h1>
27 |   <ul>
28 |     <li><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/tags/probability/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “probability” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “probability”</h1>
27 |   <ul>
28 |     <li><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a>
29 |   </ul>
30 | 
31 |       </div>
32 |       <div class=footer>
33 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
34 |         <p>&nbsp;
35 |         <p>&copy; Copyright 2013 by Matt Alcock. 
36 |         <p>
37 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
38 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
39 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
40 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
41 |         <p>
42 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
43 |       </div>
44 |     </div>
45 |     <script type="text/javascript">
46 | 
47 |       var _gaq = _gaq || [];
48 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
49 |       _gaq.push(['_trackPageview']);
50 | 
51 |       (function() {
52 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
53 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
54 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
55 |       })();
56 | 
57 |     </script>
58 |   </body>
59 | </html>
60 | 


--------------------------------------------------------------------------------
/_build/2013/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive for 2013 | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog <a href="/archive/">Archive</a> for 2013</h1>
28 | 
29 |   <ul class=blog-archive>
30 |       <li><a href="/2013/02/">February</a>
31 |         — 1 entry
32 |       <li><a href="/2013/01/">January</a>
33 |         — 1 entry
34 |   </ul>
35 | 
36 |       </div>
37 |       <div class=footer>
38 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
39 |         <p>&nbsp;
40 |         <p>&copy; Copyright 2013 by Matt Alcock. 
41 |         <p>
42 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
43 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
44 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
45 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
46 |         <p>
47 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
48 |       </div>
49 |     </div>
50 |     <script type="text/javascript">
51 | 
52 |       var _gaq = _gaq || [];
53 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
54 |       _gaq.push(['_trackPageview']);
55 | 
56 |       (function() {
57 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
58 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
59 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
60 |       })();
61 | 
62 |     </script>
63 |   </body>
64 | </html>
65 | 


--------------------------------------------------------------------------------
/_build/2013/02/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive for February, 2013 | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog <a href="/archive/">Archive</a> for February,
28 |     <a href="/2013/">2013</a></h1>
29 | 
30 |   <ul class=blog-archive>
31 |       <li><a href="/2013/2/24/timing-python-code">Timing Python Code</a>, February 24, 2013
32 |   </ul>
33 | 
34 |       </div>
35 |       <div class=footer>
36 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
37 |         <p>&nbsp;
38 |         <p>&copy; Copyright 2013 by Matt Alcock. 
39 |         <p>
40 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
41 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
42 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
43 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
44 |         <p>
45 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
46 |       </div>
47 |     </div>
48 |     <script type="text/javascript">
49 | 
50 |       var _gaq = _gaq || [];
51 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
52 |       _gaq.push(['_trackPageview']);
53 | 
54 |       (function() {
55 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
56 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
57 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
58 |       })();
59 | 
60 |     </script>
61 |   </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/_build/2013/01/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive for January, 2013 | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog <a href="/archive/">Archive</a> for January,
28 |     <a href="/2013/">2013</a></h1>
29 | 
30 |   <ul class=blog-archive>
31 |       <li><a href="/2013/1/5/decorates-and-annotations">Decorators &amp; Annotations</a>, January 5, 2013
32 |   </ul>
33 | 
34 |       </div>
35 |       <div class=footer>
36 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
37 |         <p>&nbsp;
38 |         <p>&copy; Copyright 2013 by Matt Alcock. 
39 |         <p>
40 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
41 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
42 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
43 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
44 |         <p>
45 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
46 |       </div>
47 |     </div>
48 |     <script type="text/javascript">
49 | 
50 |       var _gaq = _gaq || [];
51 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
52 |       _gaq.push(['_trackPageview']);
53 | 
54 |       (function() {
55 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
56 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
57 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
58 |       })();
59 | 
60 |     </script>
61 |   </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   {% block htmlhead %}
 6 |     <title>{% block title %}Home{% endblock %} | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |     {%- for link in links %}
10 |     <link rel="{{ link.rel }}" href="{{ link.href }}"{%
11 |       if link.media %} media="{{ link.media }}"{% endif %} type="{{ link.type }}">
12 |     {%- endfor %}
13 |   {% endblock %}
14 |   </head>
15 |   <body>
16 |     <div class=container>
17 |       <div class=header>
18 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
19 |       </div>
20 |       <div class=navigation>
21 |         <ul>
22 |           <li><a href="/">Blog</a>
23 |           <li><a href="/archive/">Archive</a>
24 |           <li><a href="/tags/">Tags</a>
25 |           <li><a href="/about/">About</a>
26 |         </ul>
27 |       </div>
28 |       <div class=body>
29 |       {% block body %}{% endblock %}
30 |       </div>
31 |       <div class=footer>
32 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
33 |         <p>&nbsp;
34 |         <p>&copy; Copyright {{ format_date(format='YYYY') }} by Matt Alcock. 
35 |         <p>
36 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
37 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
38 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
39 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
40 |         <p>
41 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
42 |       </div>
43 |     </div>
44 |     <script type="text/javascript">
45 | 
46 |       var _gaq = _gaq || [];
47 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
48 |       _gaq.push(['_trackPageview']);
49 | 
50 |       (function() {
51 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
52 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
53 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
54 |       })();
55 | 
56 |     </script>
57 |   </body>
58 | </html>


--------------------------------------------------------------------------------
/_build/tags/python/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Entries tagged “python” | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Entries <a href="/tags/">tagged</a> “python”</h1>
27 |   <ul>
28 |     <li><a href="/2013/1/5/decorates-and-annotations">Decorators &amp; Annotations</a>
29 |     <li><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a>
30 |     <li><a href="/2013/2/24/timing-python-code">Timing Python Code</a>
31 |   </ul>
32 | 
33 |       </div>
34 |       <div class=footer>
35 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
36 |         <p>&nbsp;
37 |         <p>&copy; Copyright 2013 by Matt Alcock. 
38 |         <p>
39 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
40 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
41 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
42 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
43 |         <p>
44 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
45 |       </div>
46 |     </div>
47 |     <script type="text/javascript">
48 | 
49 |       var _gaq = _gaq || [];
50 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
51 |       _gaq.push(['_trackPageview']);
52 | 
53 |       (function() {
54 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
55 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
56 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
57 |       })();
58 | 
59 |     </script>
60 |   </body>
61 | </html>
62 | 


--------------------------------------------------------------------------------
/_build/2012/12/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive for December, 2012 | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog <a href="/archive/">Archive</a> for December,
28 |     <a href="/2012/">2012</a></h1>
29 | 
30 |   <ul class=blog-archive>
31 |       <li><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a>, December 5, 2012
32 |       <li><a href="/2012/12/4/hello-internet">Hello Internet</a>, December 4, 2012
33 |   </ul>
34 | 
35 |       </div>
36 |       <div class=footer>
37 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
38 |         <p>&nbsp;
39 |         <p>&copy; Copyright 2013 by Matt Alcock. 
40 |         <p>
41 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
42 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
43 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
44 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
45 |         <p>
46 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
47 |       </div>
48 |     </div>
49 |     <script type="text/javascript">
50 | 
51 |       var _gaq = _gaq || [];
52 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
53 |       _gaq.push(['_trackPageview']);
54 | 
55 |       (function() {
56 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
57 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
58 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
59 |       })();
60 | 
61 |     </script>
62 |   </body>
63 | </html>
64 | 


--------------------------------------------------------------------------------
/_build/archive/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Blog Archive | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 | 
27 |   <h1>Blog Archive</h1>
28 | 
29 |   <ul class=blog-archive>
30 |     <li><a href="/2013/">2013</a>
31 |       <ul>
32 |         <li><a href="/2013/02/">February</a>
33 |           — 1 entry
34 |         <li><a href="/2013/01/">January</a>
35 |           — 1 entry
36 |       </ul>
37 |     <li><a href="/2012/">2012</a>
38 |       <ul>
39 |         <li><a href="/2012/12/">December</a>
40 |           — 2 entries
41 |       </ul>
42 |   </ul>
43 | 
44 |       </div>
45 |       <div class=footer>
46 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
47 |         <p>&nbsp;
48 |         <p>&copy; Copyright 2013 by Matt Alcock. 
49 |         <p>
50 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
51 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
52 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
53 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
54 |         <p>
55 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
56 |       </div>
57 |     </div>
58 |     <script type="text/javascript">
59 | 
60 |       var _gaq = _gaq || [];
61 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
62 |       _gaq.push(['_trackPageview']);
63 | 
64 |       (function() {
65 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
66 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
67 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
68 |       })();
69 | 
70 |     </script>
71 |   </body>
72 | </html>
73 | 


--------------------------------------------------------------------------------
/_build/tags/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Tags | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |   
10 |   </head>
11 |   <body>
12 |     <div class=container>
13 |       <div class=header>
14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
15 |       </div>
16 |       <div class=navigation>
17 |         <ul>
18 |           <li><a href="/">Blog</a>
19 |           <li><a href="/archive/">Archive</a>
20 |           <li><a href="/tags/">Tags</a>
21 |           <li><a href="/about/">About</a>
22 |         </ul>
23 |       </div>
24 |       <div class=body>
25 |       
26 |   <h1>Tags</h1>
27 |   <ul class=tagcloud>
28 |     <li><a href="/tags/bayes/" style="font-size: 100.0%">bayes</a>
29 |     <li><a href="/tags/introduction/" style="font-size: 100.0%">introduction</a>
30 |     <li><a href="/tags/performance/" style="font-size: 100.0%">performance</a>
31 |     <li><a href="/tags/probability/" style="font-size: 100.0%">probability</a>
32 |     <li><a href="/tags/python/" style="font-size: 121.972245773%">python</a>
33 |     <li><a href="/tags/statistics/" style="font-size: 100.0%">statistics</a>
34 |     <li><a href="/tags/thoughts/" style="font-size: 100.0%">thoughts</a>
35 |   </ul>
36 | 
37 |       </div>
38 |       <div class=footer>
39 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
40 |         <p>&nbsp;
41 |         <p>&copy; Copyright 2013 by Matt Alcock. 
42 |         <p>
43 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
44 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
45 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
46 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
47 |         <p>
48 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
49 |       </div>
50 |     </div>
51 |     <script type="text/javascript">
52 | 
53 |       var _gaq = _gaq || [];
54 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
55 |       _gaq.push(['_trackPageview']);
56 | 
57 |       (function() {
58 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
59 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
60 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
61 |       })();
62 | 
63 |     </script>
64 |   </body>
65 | </html>
66 | 


--------------------------------------------------------------------------------
/_build/tags/Thoughts/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2012-12-04T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Hello Internet</title>
12 |     <id>http://blog.mattalcock.com/2012/12/4/hello-internet</id>
13 |     <updated>2012-12-04T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2012/12/4/hello-internet" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;The oblatory first post. I'll be honest I've been meaning to start and commit to a blog for sometime. After some false starts covering my broad spectrum of interests I've decided to focus on writing about my thoughts as a Data Scientist. I'm hoping people will find this informative and insightful. If there are any thoughts, feedback or collaborations that come from this then the blog will have been a success. So please let me know what you think.&lt;/p&gt;
19 | &lt;p&gt;I've been working with data for 10 years and my job title has jumped around from Developer, Data Analyst, Quantitate Analyst, Team Leader, Product Manager, Warehouse Manager, Head Of Analytics, Lead Analyst and Data Scientist. So what am I? I'm not sure everyone fits into role buckets but one thing I am convinced of is that everyones interests and expertise is different. I enjoy managing small technical teams, I love working with large amounts of data and I have the expertise to apply statistical and scientific methods to my work.&lt;/p&gt;
20 | &lt;p&gt;Below are some biases and opinions I should mention before I start. I'll explain these in more detail over the coming posts but their four personal and somewhat subjective opinions I wanted to share from the outset.
21 | `
22 | -    I'm love the power of modern NoSQL data stores but still feel SQL is an amazing tool for analysis thats hard to beat.
23 | -    I think data can unlock questions and give insights into almost every area of business but also I understand it's not the silver bullet. It should be used with creativity, lateral thinking and domain expertise not instead of it.
24 | -    I love the concise power of statistics but realise they're frequently misleading and often poorly presented and explained.
25 | -    I'm program language agnostic but love to use Python&lt;/p&gt;
26 | &lt;p&gt;The blog will contain, thoughts, tools, projects and some book reviews. If there is anything you'd like me to talk about or review please drop me an mail.&lt;/p&gt;
27 | &lt;p&gt;I hope you like whats to come.........&lt;/p&gt;
28 | </content>
29 |   </entry>
30 | </feed>
31 | 
32 | 


--------------------------------------------------------------------------------
/_build/2013/2/4/joel-test-for-data-teams/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>The &#39;Data Team Test&#39; | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
10 |   
11 |   </head>
12 |   <body>
13 |     <div class=container>
14 |       <div class=header>
15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
16 |       </div>
17 |       <div class=navigation>
18 |         <ul>
19 |           <li><a href="/">Blog</a>
20 |           <li><a href="/archive/">Archive</a>
21 |           <li><a href="/tags/">Tags</a>
22 |           <li><a href="/about/">About</a>
23 |         </ul>
24 |       </div>
25 |       <div class=body>
26 |       
27 |   <h1 class="title">The 'Data Team Test'</h1>
28 | 
29 |   
30 |   <p class=date>written on Monday, February 4, 2013
31 |   
32 | 
33 |   <p>The original 'Joel Test'.</p>
34 | <p>Do you use source control?
35 | Can you make a build in one step?
36 | Do you make daily builds?
37 | Do you have a bug database?
38 | Do you fix bugs before writing new code?
39 | Do you have an up-to-date schedule?
40 | Do you have a spec?
41 | Do programmers have quiet working conditions?
42 | Do you use the best tools money can buy?
43 | Do you have testers?
44 | Do new candidates write code during their interview?
45 | Do you do hallway usability testing?</p>
46 | <p>The 'Data Team Test'.</p>
47 | <p>Do you use source control?
48 | Do you have a seperate devlopment and production enviroment?
49 | Do you have access to raw data in a warehouse or other offline store that doesn't impact the live system?
50 | Do you have a way of recording and marking dirty data?
51 | Do you record bugs and fix them before writing new code?
52 | Do you have a process to schedule and prioritse analysis?
53 | Do you spend time with product owners and domain experts?
54 | Do programmers/analysts have quiet working conditions?
55 | Do you use the best tools money can buy?
56 | Do you have testers?
57 | Do you do have a mechanism to communicate findings and make recommendations?
58 | Do you and other teams employ data driven development teqniques?</p>
59 | 
60 | 
61 |   
62 | 
63 |   
64 | 
65 |       </div>
66 |       <div class=footer>
67 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
68 |         <p>&nbsp;
69 |         <p>&copy; Copyright 2013 by Matt Alcock. 
70 |         <p>
71 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
72 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
73 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
74 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
75 |         <p>
76 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
77 |       </div>
78 |     </div>
79 |     <script type="text/javascript">
80 | 
81 |       var _gaq = _gaq || [];
82 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
83 |       _gaq.push(['_trackPageview']);
84 | 
85 |       (function() {
86 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
87 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
88 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
89 |       })();
90 | 
91 |     </script>
92 |   </body>
93 | </html>
94 | 


--------------------------------------------------------------------------------
/_build/static/_pygments.css:
--------------------------------------------------------------------------------
 1 | .hll { background-color: #ffffcc }
 2 | .c { color: #8f5902; font-style: italic } /* Comment */
 3 | .err { color: #a40000; border: 1px solid #ef2929 } /* Error */
 4 | .g { color: #000000 } /* Generic */
 5 | .k { color: #204a87; font-weight: bold } /* Keyword */
 6 | .l { color: #000000 } /* Literal */
 7 | .n { color: #000000 } /* Name */
 8 | .o { color: #ce5c00; font-weight: bold } /* Operator */
 9 | .x { color: #000000 } /* Other */
10 | .p { color: #000000; font-weight: bold } /* Punctuation */
11 | .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */
12 | .cp { color: #8f5902; font-style: italic } /* Comment.Preproc */
13 | .c1 { color: #8f5902; font-style: italic } /* Comment.Single */
14 | .cs { color: #8f5902; font-style: italic } /* Comment.Special */
15 | .gd { color: #a40000 } /* Generic.Deleted */
16 | .ge { color: #000000; font-style: italic } /* Generic.Emph */
17 | .gr { color: #ef2929 } /* Generic.Error */
18 | .gh { color: #000080; font-weight: bold } /* Generic.Heading */
19 | .gi { color: #00A000 } /* Generic.Inserted */
20 | .go { color: #000000; font-style: italic } /* Generic.Output */
21 | .gp { color: #8f5902 } /* Generic.Prompt */
22 | .gs { color: #000000; font-weight: bold } /* Generic.Strong */
23 | .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
24 | .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */
25 | .kc { color: #204a87; font-weight: bold } /* Keyword.Constant */
26 | .kd { color: #204a87; font-weight: bold } /* Keyword.Declaration */
27 | .kn { color: #204a87; font-weight: bold } /* Keyword.Namespace */
28 | .kp { color: #204a87; font-weight: bold } /* Keyword.Pseudo */
29 | .kr { color: #204a87; font-weight: bold } /* Keyword.Reserved */
30 | .kt { color: #204a87; font-weight: bold } /* Keyword.Type */
31 | .ld { color: #000000 } /* Literal.Date */
32 | .m { color: #0000cf; font-weight: bold } /* Literal.Number */
33 | .s { color: #4e9a06 } /* Literal.String */
34 | .na { color: #c4a000 } /* Name.Attribute */
35 | .nb { color: #204a87 } /* Name.Builtin */
36 | .nc { color: #000000 } /* Name.Class */
37 | .no { color: #000000 } /* Name.Constant */
38 | .nd { color: #5c35cc; font-weight: bold } /* Name.Decorator */
39 | .ni { color: #ce5c00 } /* Name.Entity */
40 | .ne { color: #cc0000; font-weight: bold } /* Name.Exception */
41 | .nf { color: #000000 } /* Name.Function */
42 | .nl { color: #f57900 } /* Name.Label */
43 | .nn { color: #000000 } /* Name.Namespace */
44 | .nx { color: #000000 } /* Name.Other */
45 | .py { color: #000000 } /* Name.Property */
46 | .nt { color: #204a87; font-weight: bold } /* Name.Tag */
47 | .nv { color: #000000 } /* Name.Variable */
48 | .ow { color: #204a87; font-weight: bold } /* Operator.Word */
49 | .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */
50 | .mf { color: #0000cf; font-weight: bold } /* Literal.Number.Float */
51 | .mh { color: #0000cf; font-weight: bold } /* Literal.Number.Hex */
52 | .mi { color: #0000cf; font-weight: bold } /* Literal.Number.Integer */
53 | .mo { color: #0000cf; font-weight: bold } /* Literal.Number.Oct */
54 | .sb { color: #4e9a06 } /* Literal.String.Backtick */
55 | .sc { color: #4e9a06 } /* Literal.String.Char */
56 | .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */
57 | .s2 { color: #4e9a06 } /* Literal.String.Double */
58 | .se { color: #4e9a06 } /* Literal.String.Escape */
59 | .sh { color: #4e9a06 } /* Literal.String.Heredoc */
60 | .si { color: #4e9a06 } /* Literal.String.Interpol */
61 | .sx { color: #4e9a06 } /* Literal.String.Other */
62 | .sr { color: #4e9a06 } /* Literal.String.Regex */
63 | .s1 { color: #4e9a06 } /* Literal.String.Single */
64 | .ss { color: #4e9a06 } /* Literal.String.Symbol */
65 | .bp { color: #3465a4 } /* Name.Builtin.Pseudo */
66 | .vc { color: #000000 } /* Name.Variable.Class */
67 | .vg { color: #000000 } /* Name.Variable.Global */
68 | .vi { color: #000000 } /* Name.Variable.Instance */
69 | .il { color: #0000cf; font-weight: bold } /* Literal.Number.Integer.Long */


--------------------------------------------------------------------------------
/_build/about/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>About Me | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
10 |   
11 |   </head>
12 |   <body>
13 |     <div class=container>
14 |       <div class=header>
15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
16 |       </div>
17 |       <div class=navigation>
18 |         <ul>
19 |           <li><a href="/">Blog</a>
20 |           <li><a href="/archive/">Archive</a>
21 |           <li><a href="/tags/">Tags</a>
22 |           <li><a href="/about/">About</a>
23 |         </ul>
24 |       </div>
25 |       <div class=body>
26 |       
27 |   <h1 class="title">About Me</h1>
28 | 
29 |   
30 | 
31 |   <p>My name is Matt Alcock and I'm a Data Scientist, Analytics Lead and Python fan.  I'm currently the Lead Analyst at NaturalMotion Games where I manage a small team of data analysts. I currently work and live in Oxford. I've worked in small startups and large multinational companies covering a variety of industries including games, finance and fashion. I've been working with data for 10+ years and although my jobs have been varried they've all centered around drawing insight from large data sets.</p>
32 | <p>I split the majority of my time between Oxford and London. If you'd like to meet for a coffee or discuss anything please  drop me a message through one of the following channels:</p>
33 | <ul class="simple">
34 | <li>drop me an <a class="reference external" href="mailto:mattalcock&#64;mac.com">email</a></li>
35 | <li>send a tweet to <a class="reference external" href="http://twitter.com/mattalcock">&#64;mattalcock</a></li>
36 | <li>send me a driect message on <a class="reference external" href="http://www.linkedin.com/pub/matt-alcock/2b/16b/9b2">LinkedIn</a></li>
37 | </ul>
38 | <div class="section" id="about-this-website">
39 | <h2>About this Website</h2>
40 | <p>The website is a collection of observations, thoughts, notes and side projects. A lot of the supporting code for the blog posts can be found in a public repo under my <a class="reference external" href="http://github.com/mattalcock">github account</a>.</p>
41 | <p>The website itself is written in restructured text and built with a small
42 | script written by the very talented Armin Ronacher.  Sourcecode can be <a class="reference external" href="http://github.com/mitsuhiko/lucumr">found on github</a>.</p>
43 | </div>
44 | 
45 | 
46 |   
47 | 
48 |   
49 | 
50 |       </div>
51 |       <div class=footer>
52 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
53 |         <p>&nbsp;
54 |         <p>&copy; Copyright 2013 by Matt Alcock. 
55 |         <p>
56 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
57 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
58 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
59 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
60 |         <p>
61 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
62 |       </div>
63 |     </div>
64 |     <script type="text/javascript">
65 | 
66 |       var _gaq = _gaq || [];
67 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
68 |       _gaq.push(['_trackPageview']);
69 | 
70 |       (function() {
71 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
72 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
73 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
74 |       })();
75 | 
76 |     </script>
77 |   </body>
78 | </html>
79 | 


--------------------------------------------------------------------------------
/2012/12/5/python-spell-checker.rst:
--------------------------------------------------------------------------------
 1 | public: yes
 2 | tags: [python, probability, statistics, bayes]
 3 | pub_date: 2012-12-05
 4 | summary: |
 5 |   How to use Python and some powerful statistics to create a very lightweight but effective Google style spell checker.
 6 | 
 7 | Did you mean 'python spell checker'?
 8 | ====================================
 9 | 
10 | Have you ever been really impressed with Googles 'Did you mean....' spell checker? 
11 | Have you ever just typed something into google to help you with your spelling? 
12 | 
13 | My answer to the above questions above would be Yes, all the time!
14 | 
15 | In a fantastic post I read some years ago Peter Norvig outlined how Google’s ‘did you mean’ spelling corrector uses probability theory, large training sets and some elegant statistical language processing to be so effective.  Type in a search like 'speling' and Google comes back in 0.1 seconds or so with Did you mean: 'spelling'. Below is a toy spelling corrector in python that achieves 80 to 90% accuracy and is very fast. It's written in a fanstically impressive 21 lines of code. It uses list comprehensions, and some of my favorite data structures (sets and default dictionaries).
16 | 
17 | The code and supporting data files can be found in my hacks public repo under the spellcheck folder. 
18 | 
19 | The data seed comes from a big.txt file that consists of about a million words. The file is a concatenation of several public domain books from Project Gutenberg and lists of most frequent words from Wiktionary and the British National Corpus. It uses a simple training method of just counting the occurrences of each word in the big text file. Obviously Google has a lot more data to seed this spelling checker with but I was suprised at how effective this relatively small seed was.
20 | 
21 | .. sourcecode:: python
22 | 
23 |     import re, collections
24 | 
25 |     def words(text):
26 |         return re.findall('[a-z]+', text.lower())
27 | 
28 |     def train(features):
29 |         model = collections.defaultdict(lambda: 1)
30 |         for f in features:
31 |             model[f] += 1
32 |         return model
33 | 
34 |     NWORDS = train(words(file('big.txt').read()))
35 |     alphabet = 'abcdefghijklmnopqrstuvwxyz'
36 | 
37 |     def edits1(word):
38 |         s = [(word[:i], word[i:]) for i in range(len(word) + 1)]
39 |         deletes    = [a + b[1:] for a, b in s if b]
40 |         transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1]
41 |         replaces   = [a + c + b[1:] for a, b in s for c in alphabet if b]
42 |         inserts    = [a + c + b     for a, b in s for c in alphabet]
43 |         return set(deletes + transposes + replaces + inserts)
44 | 
45 |     def known_edits2(word):
46 |         return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS)
47 | 
48 |     def known(words): 
49 |         return set(w for w in words if w in NWORDS)
50 | 
51 |     def correct(word):
52 |         candidates = known([word]) or known(edits1(word)) or    known_edits2(word) or [word]
53 |         return max(candidates, key=NWORDS.get)
54 | 
55 | 
56 | If your new to python some of the above code my look complicated and hard to follow. Although dense I love Peter's use of list comprehensions and generators. The use of nested function composits is also very efficient and I've noticed a massive speed up in using such approaches when injesting or processing large data files. 
57 | 
58 | An exmaple of nested function composition is:
59 | 
60 | .. sourcecode:: python
61 | 
62 |     NWORDS = train(words(file('big.txt').read()))
63 | 
64 | An example of complex list comprehension is:
65 | 
66 | .. sourcecode:: python
67 | 
68 |     [a + c + b[1:] for a, b in s for c in alphabet if b]
69 | 
70 | The final thing I really like in this code snippet is the overriding of the key function when max is called in the 'correct' function. This is a great way to find the word with the highest value in a dictionaty of word->count mappings.
71 | 
72 | .. sourcecode:: python
73 | 
74 |     return max(candidates, key=NWORDS.get)
75 | 
76 | The code is simple and elegant and basically generates a set of candidates words based on the partial or badly spelt word (aka the original word). The most often used word from the candiates is chosen. Peter expalins how Bayes Theorem is used to select the best correction given the original word.
77 | 
78 | See more details, test results and further work at Peter Novig’s `site <http://norvig.com/spell-correct.html>`_ .
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/_build/2012/12/4/hello-internet/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <meta charset=utf-8>
 5 |   
 6 |     <title>Hello Internet | Matt Alcock - A Data Scientist with a passion for Python</title>
 7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
 8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
 9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
10 |   
11 |   </head>
12 |   <body>
13 |     <div class=container>
14 |       <div class=header>
15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
16 |       </div>
17 |       <div class=navigation>
18 |         <ul>
19 |           <li><a href="/">Blog</a>
20 |           <li><a href="/archive/">Archive</a>
21 |           <li><a href="/tags/">Tags</a>
22 |           <li><a href="/about/">About</a>
23 |         </ul>
24 |       </div>
25 |       <div class=body>
26 |       
27 |   <h1 class="title">Hello Internet</h1>
28 | 
29 |   
30 |   <p class=date>written on Tuesday, December 4, 2012
31 |   
32 | 
33 |   <p>The oblatory first post. I'll be honest I've been meaning to start and commit to a blog for sometime. After some false starts covering my broad spectrum of interests I've decided to focus on writing about my thoughts as a Data Scientist. I'm hoping people will find this informative and insightful. If there are any thoughts, feedback or collaborations that come from this then the blog will have been a success. So please let me know what you think.</p>
34 | <p>I've been working with data for 10 years and my job title has jumped around from Developer, Data Analyst, Quantitate Analyst, Team Leader, Product Manager, Warehouse Manager, Head Of Analytics, Lead Analyst and Data Scientist. So what am I? I'm not sure everyone fits into role buckets but one thing I am convinced of is that everyones interests and expertise is different. I enjoy managing small technical teams, I love working with large amounts of data and I have the expertise to apply statistical and scientific methods to my work.</p>
35 | <p>Below are some biases and opinions I should mention before I start. I'll explain these in more detail over the coming posts but their four personal and somewhat subjective opinions I wanted to share from the outset.
36 | `
37 | -    I'm love the power of modern NoSQL data stores but still feel SQL is an amazing tool for analysis thats hard to beat.
38 | -    I think data can unlock questions and give insights into almost every area of business but also I understand it's not the silver bullet. It should be used with creativity, lateral thinking and domain expertise not instead of it.
39 | -    I love the concise power of statistics but realise they're frequently misleading and often poorly presented and explained.
40 | -    I'm program language agnostic but love to use Python</p>
41 | <p>The blog will contain, thoughts, tools, projects and some book reviews. If there is anything you'd like me to talk about or review please drop me an mail.</p>
42 | <p>I hope you like whats to come.........</p>
43 | 
44 | 
45 |   
46 |   <p class=tags>This entry was tagged
47 |     
48 |       <a href="/tags/thoughts/">thoughts</a>
49 |   
50 | 
51 |   
52 | 
53 |       </div>
54 |       <div class=footer>
55 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
56 |         <p>&nbsp;
57 |         <p>&copy; Copyright 2013 by Matt Alcock. 
58 |         <p>
59 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
60 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
61 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
62 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
63 |         <p>
64 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
65 |       </div>
66 |     </div>
67 |     <script type="text/javascript">
68 | 
69 |       var _gaq = _gaq || [];
70 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
71 |       _gaq.push(['_trackPageview']);
72 | 
73 |       (function() {
74 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
75 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
76 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
77 |       })();
78 | 
79 |     </script>
80 |   </body>
81 | </html>
82 | 


--------------------------------------------------------------------------------
/static/style.css:
--------------------------------------------------------------------------------
 1 | /* fonts */
 2 | @import url(http://fonts.googleapis.com/css?family=Merriweather:400,300);
 3 | @import url(http://fonts.googleapis.com/css?family=Ubuntu+Mono:400,400italic,700,700italic);
 4 | 
 5 | /* general style */
 6 | body    { font: 17px/25px 'Merriweather', serif;
 7 |           margin: 0; padding: 0; font-weight: 300; }
 8 | a       { color: black; font-weight: 400; }
 9 | a:hover { color: #CC0033; }
10 | 
11 | /* headlines */
12 | h1, h2, h3, h4, h5, h6      { font-family: 'Merriweather', serif;
13 |                               font-weight: 300; color: #222; }
14 | h1 a, h2 a, h3 a, h4 a,
15 | h5 a, h6 a                  { text-decoration: none; }
16 | h1 a:hover, h2 a:hover, 
17 | h3 a:hover, h4 a:hover      { text-decoration: underline; }
18 | h1.title                    { width: 560px; }
19 | h1, h2                      { margin: 10px 0 25px 0; }
20 | h1                          { font-size: 42px; line-height: 52px; }
21 | h2                          { font-size: 32px; line-height: 40px; }
22 | 
23 | /* layout elements */
24 | div.container               { width: 740px; margin: 48px auto; padding: 0; }
25 | div.header                  { float: left; }
26 | div.navigation              { float: right; }
27 | div.header, div.navigation  { height: 25px; margin-bottom: 42px; }
28 | div.navigation ul           { margin: 0; padding: 0; list-style: none; }
29 | div.navigation ul li        { display: inline; margin: 0 2px; padding: 0; } 
30 | div.body                    { clear: both; }
31 | div.footer                  { border-top: 1px solid #555; padding-top: 9px;
32 |                               margin-top: 42px; font-size: 16px;
33 |                               text-align: center; color: #555; }
34 | div.footer p                { margin: 0; }   
35 | 
36 | /* margins and stuff */
37 | p, div.line-block, ul, ol, pre,
38 |     table                       { margin: 25px 0 25px 0; }
39 | dt                              { margin: 25px 0 16px 0; padding: 0; }
40 | dd                              { margin: 16px 0 25px 40px; padding: 0; }
41 | ul ol, ol ul, ul ul, ol ol      { margin: 10px 0; padding: 0 0 0 40px; }
42 | li                              { padding: 0; }
43 | h1 + p.date                     { margin-top: -25px; }
44 | 
45 | /* code formatting.  no monospace because of webkit (bug?) */
46 | pre, code, tt   { font-family: 'Ubuntu Mono', 'Consolas', 'Deja Vu Sans Mono',
47 |                   'Bitstream Vera Sans Mono', 'Monaco', 'Courier New';
48 |                   font-size: 0.9em; }
49 | pre             { line-height: 1.3; background: none; padding: 0; }
50 | code, tt        { background: #eee; }
51 | 
52 | /* tables */
53 | table           { border: 1px solid #ddd; border-collapse: collapse;
54 |                   background: #fafafa; }
55 | td, th          { padding: 2px 12px; border: 1px solid #ddd; }
56 | 
57 | /* footnotes */
58 | table.footnote          { margin: 15px 0; background: transparent; border: none; }
59 | table.footnote td       { border: none; padding: 9px 0 0 0; font-size: 12px; }
60 | table.footnote td.label { padding-right: 10px; }
61 | table.footnote td p     { margin: 0; }
62 | table.footnote td p + p { margin-top: 15px; }
63 | 
64 | /* blog overview */
65 | div.entry-overview                  { margin: 25px 122px 25px 102px; }
66 | div.entry-overview h1,
67 | div.entry-overview div.summary,
68 | div.entry-overview div.summary p    { line-height: 25px; }
69 | div.entry-overview h1               { margin: 0; font-size: 20px; }
70 | div.entry-overview div.summary,
71 | div.entry-overview div.date,
72 | div.entry-overview div.summary p    { margin: 0; padding: 0; }
73 | div.entry-overview div.detail       { margin-left: 140px; }
74 | div.entry-overview div.date         { float: left; width: 120px; color: #CC0033;
75 |                                       text-align: right; font-size: 14px; }
76 | div.entry-overview div.summary-tags { font-size: 12px; }     
77 | div.entry-overview div.summary-tags a { text-decoration: none; font-weight: 300;}                         
78 | div.entry-overview h1               { font-weight: normal; }
79 | div.entry-overview h1:after         { content: ""; }
80 | 
81 | /* other alignment things */
82 | img.align-center        { margin: 15px auto; display: block; }
83 | 
84 | /* pagination */
85 | div.pagination          { margin: 36px 0 0 0; text-align: center; }
86 | div.pagination strong   { font-weight: normal; font-style: italic; }
87 | 
88 | /* tags */
89 | p.tags          { text-align: right; }
90 | ul.tagcloud     { font-size: 16px; margin: 36px 0; padding: 0;
91 |                   list-style: none; }
92 | ul.tagcloud li  { margin: 0; padding: 0 10px; display: inline; }
93 | 
94 | /* latex math */
95 | span.math img   { margin-bottom: -7px; }


--------------------------------------------------------------------------------
/_build/static/style.css:
--------------------------------------------------------------------------------
 1 | /* fonts */
 2 | @import url(http://fonts.googleapis.com/css?family=Merriweather:400,300);
 3 | @import url(http://fonts.googleapis.com/css?family=Ubuntu+Mono:400,400italic,700,700italic);
 4 | 
 5 | /* general style */
 6 | body    { font: 17px/25px 'Merriweather', serif;
 7 |           margin: 0; padding: 0; font-weight: 300; }
 8 | a       { color: black; font-weight: 400; }
 9 | a:hover { color: #CC0033; }
10 | 
11 | /* headlines */
12 | h1, h2, h3, h4, h5, h6      { font-family: 'Merriweather', serif;
13 |                               font-weight: 300; color: #222; }
14 | h1 a, h2 a, h3 a, h4 a,
15 | h5 a, h6 a                  { text-decoration: none; }
16 | h1 a:hover, h2 a:hover, 
17 | h3 a:hover, h4 a:hover      { text-decoration: underline; }
18 | h1.title                    { width: 560px; }
19 | h1, h2                      { margin: 10px 0 25px 0; }
20 | h1                          { font-size: 42px; line-height: 52px; }
21 | h2                          { font-size: 32px; line-height: 40px; }
22 | 
23 | /* layout elements */
24 | div.container               { width: 740px; margin: 48px auto; padding: 0; }
25 | div.header                  { float: left; }
26 | div.navigation              { float: right; }
27 | div.header, div.navigation  { height: 25px; margin-bottom: 42px; }
28 | div.navigation ul           { margin: 0; padding: 0; list-style: none; }
29 | div.navigation ul li        { display: inline; margin: 0 2px; padding: 0; } 
30 | div.body                    { clear: both; }
31 | div.footer                  { border-top: 1px solid #555; padding-top: 9px;
32 |                               margin-top: 42px; font-size: 16px;
33 |                               text-align: center; color: #555; }
34 | div.footer p                { margin: 0; }   
35 | 
36 | /* margins and stuff */
37 | p, div.line-block, ul, ol, pre,
38 |     table                       { margin: 25px 0 25px 0; }
39 | dt                              { margin: 25px 0 16px 0; padding: 0; }
40 | dd                              { margin: 16px 0 25px 40px; padding: 0; }
41 | ul ol, ol ul, ul ul, ol ol      { margin: 10px 0; padding: 0 0 0 40px; }
42 | li                              { padding: 0; }
43 | h1 + p.date                     { margin-top: -25px; }
44 | 
45 | /* code formatting.  no monospace because of webkit (bug?) */
46 | pre, code, tt   { font-family: 'Ubuntu Mono', 'Consolas', 'Deja Vu Sans Mono',
47 |                   'Bitstream Vera Sans Mono', 'Monaco', 'Courier New';
48 |                   font-size: 0.9em; }
49 | pre             { line-height: 1.3; background: none; padding: 0; }
50 | code, tt        { background: #eee; }
51 | 
52 | /* tables */
53 | table           { border: 1px solid #ddd; border-collapse: collapse;
54 |                   background: #fafafa; }
55 | td, th          { padding: 2px 12px; border: 1px solid #ddd; }
56 | 
57 | /* footnotes */
58 | table.footnote          { margin: 15px 0; background: transparent; border: none; }
59 | table.footnote td       { border: none; padding: 9px 0 0 0; font-size: 12px; }
60 | table.footnote td.label { padding-right: 10px; }
61 | table.footnote td p     { margin: 0; }
62 | table.footnote td p + p { margin-top: 15px; }
63 | 
64 | /* blog overview */
65 | div.entry-overview                  { margin: 25px 122px 25px 102px; }
66 | div.entry-overview h1,
67 | div.entry-overview div.summary,
68 | div.entry-overview div.summary p    { line-height: 25px; }
69 | div.entry-overview h1               { margin: 0; font-size: 20px; }
70 | div.entry-overview div.summary,
71 | div.entry-overview div.date,
72 | div.entry-overview div.summary p    { margin: 0; padding: 0; }
73 | div.entry-overview div.detail       { margin-left: 140px; }
74 | div.entry-overview div.date         { float: left; width: 120px; color: #CC0033;
75 |                                       text-align: right; font-size: 14px; }
76 | div.entry-overview div.summary-tags { font-size: 12px; }     
77 | div.entry-overview div.summary-tags a { text-decoration: none; font-weight: 300;}                         
78 | div.entry-overview h1               { font-weight: normal; }
79 | div.entry-overview h1:after         { content: ""; }
80 | 
81 | /* other alignment things */
82 | img.align-center        { margin: 15px auto; display: block; }
83 | 
84 | /* pagination */
85 | div.pagination          { margin: 36px 0 0 0; text-align: center; }
86 | div.pagination strong   { font-weight: normal; font-style: italic; }
87 | 
88 | /* tags */
89 | p.tags          { text-align: right; }
90 | ul.tagcloud     { font-size: 16px; margin: 36px 0; padding: 0;
91 |                   list-style: none; }
92 | ul.tagcloud li  { margin: 0; padding: 0 10px; display: inline; }
93 | 
94 | /* latex math */
95 | span.math img   { margin-bottom: -7px; }


--------------------------------------------------------------------------------
/_build/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset=utf-8>
  5 |   
  6 |     <title>Blog | Matt Alcock - A Data Scientist with a passion for Python</title>
  7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
  8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
  9 |   
 10 |   </head>
 11 |   <body>
 12 |     <div class=container>
 13 |       <div class=header>
 14 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
 15 |       </div>
 16 |       <div class=navigation>
 17 |         <ul>
 18 |           <li><a href="/">Blog</a>
 19 |           <li><a href="/archive/">Archive</a>
 20 |           <li><a href="/tags/">Tags</a>
 21 |           <li><a href="/about/">About</a>
 22 |         </ul>
 23 |       </div>
 24 |       <div class=body>
 25 |       
 26 |   <div class=entry-overview>
 27 |     <div class=date>Feb 24, 2013</div>
 28 |     <div class=detail>
 29 |       <h1><a href="/2013/2/24/timing-python-code">Timing Python Code</a></h1>
 30 |       
 31 |       <div class=summary><p>Using decorators to time and optimise the performance of python code.</p>
 32 | </div>
 33 |       
 34 |         
 35 |         <div class=summary-tags>
 36 |             
 37 |               <a href="/tags/performance/">#performance</a> and 
 38 |               <a href="/tags/python/">#python</a>
 39 |         </div>
 40 |         
 41 |     </div>
 42 |   </div>
 43 |   <div class=entry-overview>
 44 |     <div class=date>Jan 5, 2013</div>
 45 |     <div class=detail>
 46 |       <h1><a href="/2013/1/5/decorates-and-annotations">Decorators &amp; Annotations</a></h1>
 47 |       
 48 |       <div class=summary><p>An introduction into decorators and annotations in python and their simple power.</p>
 49 | </div>
 50 |       
 51 |         
 52 |         <div class=summary-tags>
 53 |             
 54 |               <a href="/tags/introduction/">#introduction</a> and 
 55 |               <a href="/tags/python/">#python</a>
 56 |         </div>
 57 |         
 58 |     </div>
 59 |   </div>
 60 |   <div class=entry-overview>
 61 |     <div class=date>Dec 5, 2012</div>
 62 |     <div class=detail>
 63 |       <h1><a href="/2012/12/5/python-spell-checker">Did you mean &#39;python spell checker&#39;?</a></h1>
 64 |       
 65 |       <div class=summary><p>How to use Python and some powerful statistics to create a very lightweight but effective Google style spell checker.</p>
 66 | </div>
 67 |       
 68 |         
 69 |         <div class=summary-tags>
 70 |             
 71 |               <a href="/tags/bayes/">#bayes</a>, 
 72 |               <a href="/tags/probability/">#probability</a>, 
 73 |               <a href="/tags/python/">#python</a> and 
 74 |               <a href="/tags/statistics/">#statistics</a>
 75 |         </div>
 76 |         
 77 |     </div>
 78 |   </div>
 79 |   <div class=entry-overview>
 80 |     <div class=date>Dec 4, 2012</div>
 81 |     <div class=detail>
 82 |       <h1><a href="/2012/12/4/hello-internet">Hello Internet</a></h1>
 83 |       
 84 |       <div class=summary><p>The obligatory first post.</p>
 85 | </div>
 86 |       
 87 |         
 88 |         <div class=summary-tags>
 89 |             
 90 |               <a href="/tags/thoughts/">#thoughts</a>
 91 |         </div>
 92 |         
 93 |     </div>
 94 |   </div>  
 95 | 
 96 |    
 97 | 
 98 |       </div>
 99 |       <div class=footer>
100 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
101 |         <p>&nbsp;
102 |         <p>&copy; Copyright 2013 by Matt Alcock. 
103 |         <p>
104 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
105 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
106 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
107 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
108 |         <p>
109 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
110 |       </div>
111 |     </div>
112 |     <script type="text/javascript">
113 | 
114 |       var _gaq = _gaq || [];
115 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
116 |       _gaq.push(['_trackPageview']);
117 | 
118 |       (function() {
119 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
120 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
121 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
122 |       })();
123 | 
124 |     </script>
125 |   </body>
126 | </html>
127 | 


--------------------------------------------------------------------------------
/2013/2/24/timing-python-code.rst:
--------------------------------------------------------------------------------
  1 | public: yes
  2 | tags: [python, performance]
  3 | pub_date: 2013-02-24
  4 | summary: |
  5 |   Using decorators to time and optimise the performance of python code.
  6 | 
  7 | Timing Python Code
  8 | ==================
  9 | 
 10 | This post outlines why timing code in Python is important and provides some simple decorators that can help you time your code without the concerns and worries of peppering your lovely clean code with temporary timing and print statements.
 11 | 
 12 | Scroll down if your just after the decortor code to time functions....
 13 | 
 14 | Python vs Speed
 15 | ---------------
 16 | 
 17 | One of things Python was orignally critisied for was speed. Like lots of Dynamic Lanaguages there is an overhead in keeping tracking of types and because code is interpreted at runtime instead of being compiled to native code at compile time dynmaic langauges like Python will always be a little slower.
 18 | 
 19 | Where Python shines is in it's power and ability to allow progrmaers to opmtimise and focus on the algorthim. Focusing on the complextity of the problem and the algorithms order of magnitidue rather than the low level detials of memory management, pointers etc can often have massive benefits. Ask any computer science student and they can list of nermerous teachings that show alogrithm and data strucute design will beat brute force compuatation power.
 20 | 
 21 | If your looking to build something where microsecounds count then I'd turn to C or Java. `PyPy <http://pypy.org/>`_ and other sophiticated JIT (Just in Time) compliers can help and they seam to be the future for Pytohn solutions in this space. Another aterntative is to find the slow code and either optimise that function or write a C plugin for Python for your very specific task. This last approach seams very popular in the finaince industry where milliseconds mean dollars but they still need the felxiablity and speed of devlelopment benefits that come with a dynamic lanaguage.
 22 | 
 23 | More often than not slow code just needs some refactoring work, a new support data strucutre or a change in the complexity of processing. So the challenge is really not how can I speed up my code but what code needs my attention.
 24 | 
 25 | Finding Slow Code
 26 | -----------------
 27 | 
 28 | In order to find slow Python code we're going to have to time stuff. We don't want to cover our lovely clean code with temporary timing code and print statements, so how can we:
 29 | 
 30 |     - Time code without alteringing the code of a function
 31 |     - Get detailed timing information if the function is run with different arguments
 32 |     - Switch off the timing at deploy time to reduce the overhead and improve the performance of monitoring
 33 | 
 34 | The timing decoriatros below can help with all of these. If your new to decorators and annotations see my previous blog `post on the subject <http://blog.mattalcock.com/2013/1/5/decorates-and-annotations/>`_
 35 | 
 36 | 
 37 | The Timeit Decorator
 38 | --------------------
 39 | 
 40 | .. sourcecode:: python
 41 |     
 42 |     import time                                                
 43 | 
 44 |     def timeit(f):
 45 | 
 46 |         def timed(*args, **kw):
 47 |             ts = time.time()
 48 |             result = f(*args, **kw)
 49 |             te = time.time()
 50 | 
 51 |             print 'func:%r args:[%r, %r] took: %2.4f sec' % \
 52 |               (f.__name__, args, kw, te-ts)
 53 |             return result
 54 | 
 55 |         return timed
 56 | 
 57 | 
 58 | Using the decorator is easy either use annotations.
 59 | 
 60 | .. sourcecode:: python
 61 | 
 62 |     @timeit
 63 |     def compute_magic(n):
 64 |         #function definition
 65 |         ....
 66 | 
 67 | 
 68 | Or realias the function you want to time.
 69 | 
 70 | .. sourcecode:: python
 71 | 
 72 |     compute_magic = timeit(compute_magic)
 73 | 
 74 | 
 75 | Sometimes you'll want to remove the code timing. You can either do this by remvoing the timeit annotations before deployment or you can you a configuration switch to enable the decorator to wrap the function in timing code.
 76 | 
 77 | .. sourcecode:: python
 78 | 
 79 |     import time    
 80 | 
 81 |     #from config import TIME_FUCNTIONS
 82 |     TIME_FUCNTIONS = False                                            
 83 | 
 84 |     def timeit(f):
 85 |         if not TIME_FUCNTIONS: 
 86 |             return f
 87 |         else:
 88 |             def timed(*args, **kw):
 89 |                 ts = time.time()
 90 |                 result = f(*args, **kw)
 91 |                 te = time.time()
 92 | 
 93 |                 print 'func:%r args:[%r, %r] took: %2.4f sec' % \
 94 |                     (f.__name__, args, kw, te-ts)
 95 |                 return result
 96 | 
 97 |             return timed
 98 | 
 99 | By simpley changing the TIME_FUNCTIONS configuration swtich the functions will not decorated. I find having these swtiches in a common config file/folder often helps.
100 | 
101 | All this code and the majorty of code from my posts can be found in the hack repo of my github account. Please take a look `here <https://github.com/mattalcock/hacks>`_ . I hope it's helped if there are any questions about the above or you'd like to understand more about timing code in Python drop me a mail.
102 | 
103 | Matt
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/2013/1/5/decorates-and-annotations.rst:
--------------------------------------------------------------------------------
  1 | public: yes
  2 | tags: [python, introduction]
  3 | pub_date: 2013-01-05
  4 | summary: |
  5 |   An introduction into decorators and annotations in python and their simple power.
  6 | 
  7 | Decorators & Annotations
  8 | ========================
  9 | 
 10 | I wanted to highlight the power of decorators and annotations in python and give the novice Python programmer some insight into how they can be used. If you've only been using Python for a short while then both of these will probably be new.
 11 | 
 12 | Decorators are a way of implementing the famous computer science decorator pattern. This pattern put in simple terms is a mechanism that allow you to inject or modify code in a function. In python you can have two different styles of decorator. The function defined style or the class defined style. I prefer the function style but I'll show you using a class structure as well.
 13 | 
 14 | The best way to explain their use is through a well known example. The below code shows how to functionally compute the Fibonacci numbers.
 15 | 
 16 | The Fibonacci sequence is : [0,1,1,2,3,5,8,13.....] where the nth number equalling the sum of n-1 and n-2.
 17 | 
 18 | An elegant way of computing this is using the below code:
 19 | 
 20 | .. sourcecode:: python
 21 | 
 22 |     def fib(n):
 23 |         if n<=0:
 24 |             return 0
 25 |         elif n==1:
 26 |             return 1
 27 |         else:
 28 |             return fib(n-2) + fib(n-1)
 29 | 
 30 | So fib(7) would return 13. As you can see from the code this uses recursion. The challenge with this approach for calculating the fib sequence is that the low 'tail' function calls will get called multiple times. Remvoing this overhead is called 'tail recursion elimination' or TRE. Python doesn't support this and `probably wont <http://neopythonic.blogspot.co.uk/2009/04/tail-recursion-elimination.html>`_ . Below shows how running the fib function for just a small n can result in a massive numbers of calls of the tail values.
 31 | 
 32 | .. sourcecode:: python 
 33 | 
 34 |     fib(7) = fib(6) + fib(5)
 35 |     fib(7) = fib(4) + fib(3) + fib(4) + fib(3)
 36 |     fib(7) = fib(3) + fib(2) + fib(2) + fib(1) + fib(3) + fib(2) + fib(2) + fib(1)
 37 |     .....
 38 |     fib(7) = fib(1) + fib(0) + fib(1) + fib(0) + .......... [All fib zeros and fib ones]
 39 | 
 40 | A way to make this faster is to use a technique called Memoize. This remembers the result of a function for a given argument, stores it and if called again uses the stored version rather than re calculating. This can speed up the above by many orders of magnitude.
 41 | 
 42 | The best way to implement this function calling memory is by decorating the function with some code that can modify the execution path to check a pre saved store first. Below is the memoize decorator as a function.
 43 | 
 44 | .. sourcecode:: python
 45 | 
 46 |     def memoize(f):
 47 |         cache= {}
 48 |         def memf(*args, **kw):
 49 |             key = (args, tuple(sorted(kw.items())))
 50 |             if key not in cache:
 51 |                 cache[key] = f(*args, **kw)
 52 |             return cache[key]
 53 |         return memf
 54 | 
 55 | The memoize decorator above takes a function as an argument. It then creates a new function that stores the results of the function into a cash. The decorator then returns the new function that contains the original function call.
 56 | We can then use some cleaver dynamic language tricks to re alias the fib function to the decorated version.
 57 | 
 58 | .. sourcecode:: python
 59 | 
 60 |     fib = memoize(fib)
 61 | 
 62 | Calling fib after this aliased decoration we can ensure that the decorated function will run instead of the basic fib function
 63 | .
 64 | I hope that explains how decorators work in python and gives you an example of use. So what are annotations?
 65 | 
 66 | Annotations allow us to use decorators all over our code and are actually syntactic sugar (the same thing) as the above aliased line. Rather than re-aliasing fib to the decorated fib we can use annotations at the point of writing the fib function definition.
 67 | 
 68 | An annotated fib function would look like this. Note the simple use of @ and the decorator name above the definition.
 69 | 
 70 | .. sourcecode:: python
 71 | 
 72 |     @memoize
 73 |     def fib(n):
 74 |         if n<=0:
 75 |             return 0
 76 |         elif n==1:
 77 |             return 1
 78 |         else:
 79 |             return fib(n-2) + fib(n-1)
 80 | 
 81 | Simple hey! So annotations are just stylish and helpful ways to decorate functions at the place of definition. This really helps when your sharing code and working as a small team because you don't have to look all over the code to see if the function has been re-aliased and decorated it's right above the definition.
 82 | 
 83 | Once of the best uses of this type of decoration using annotations is to log the performance of a function or to perform some detailed profiling. You only need write a single decorator to modify and wrap any function and then you just sprinkle the decorator around your code as annotations depending on what functions you want to time/profile or investigate in detail.
 84 | 
 85 | As I mentioned before there is also a class style to writing decorators, lets use our memoize decorator as an example.
 86 | 
 87 | Written as a class the decorator is:
 88 | 
 89 | .. sourcecode:: python
 90 | 
 91 |     class Memoize:
 92 | 
 93 |         def __init__(self, f):
 94 |             self.f = f
 95 |             self.cache = {}
 96 | 
 97 |         def __call__(self, *args, **kw):
 98 |             key = key = (args, tuple(sorted(kw.items())))
 99 |             if not key in self.cache:
100 |                 self.cache[key] = self.f(*args, **kw)
101 |             return self.cache[key]
102 | 
103 | The class has to have to functions to operate as a decorator. __init__ and __call__. Some people find this easier to read and construct others prefer the function style. I think it really depends on how advanced the decorator is going to be.
104 | 
105 | The class style can then be applied in the exact same way as the above function style decorator.
106 | 
107 | .. sourcecode:: python
108 | 
109 |     fib = Memoize(fib)
110 | 
111 |     @Memoize
112 |     def fib(n):
113 |         if n<=0:
114 |             return 0
115 |        ...
116 | 
117 | I hope this has helped understand the basics of decorators and annotations. All of the decorator code listed above can be found in the hacks repo on my github account `here <https://github.com/mattalcock/hacks/tree/master/decorators>`_ 


--------------------------------------------------------------------------------
/_build/2013/2/24/timing-python-code/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset=utf-8>
  5 |   
  6 |     <title>Timing Python Code | Matt Alcock - A Data Scientist with a passion for Python</title>
  7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
  8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
  9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
 10 |   
 11 |   </head>
 12 |   <body>
 13 |     <div class=container>
 14 |       <div class=header>
 15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
 16 |       </div>
 17 |       <div class=navigation>
 18 |         <ul>
 19 |           <li><a href="/">Blog</a>
 20 |           <li><a href="/archive/">Archive</a>
 21 |           <li><a href="/tags/">Tags</a>
 22 |           <li><a href="/about/">About</a>
 23 |         </ul>
 24 |       </div>
 25 |       <div class=body>
 26 |       
 27 |   <h1 class="title">Timing Python Code</h1>
 28 | 
 29 |   
 30 |   <p class=date>written on Sunday, February 24, 2013
 31 |   
 32 | 
 33 |   <p>This post outlines why timing code in Python is important and provides some simple decorators that can help you time your code without the concerns and worries of peppering your lovely clean code with temporary timing and print statements.</p>
 34 | <p>Scroll down if your just after the decortor code to time functions....</p>
 35 | <div class="section" id="python-vs-speed">
 36 | <h2>Python vs Speed</h2>
 37 | <p>One of things Python was orignally critisied for was speed. Like lots of Dynamic Lanaguages there is an overhead in keeping tracking of types and because code is interpreted at runtime instead of being compiled to native code at compile time dynmaic langauges like Python will always be a little slower.</p>
 38 | <p>Where Python shines is in it's power and ability to allow progrmaers to opmtimise and focus on the algorthim. Focusing on the complextity of the problem and the algorithms order of magnitidue rather than the low level detials of memory management, pointers etc can often have massive benefits. Ask any computer science student and they can list of nermerous teachings that show alogrithm and data strucute design will beat brute force compuatation power.</p>
 39 | <p>If your looking to build something where microsecounds count then I'd turn to C or Java. <a class="reference external" href="http://pypy.org/">PyPy</a> and other sophiticated JIT (Just in Time) compliers can help and they seam to be the future for Pytohn solutions in this space. Another aterntative is to find the slow code and either optimise that function or write a C plugin for Python for your very specific task. This last approach seams very popular in the finaince industry where milliseconds mean dollars but they still need the felxiablity and speed of devlelopment benefits that come with a dynamic lanaguage.</p>
 40 | <p>More often than not slow code just needs some refactoring work, a new support data strucutre or a change in the complexity of processing. So the challenge is really not how can I speed up my code but what code needs my attention.</p>
 41 | </div>
 42 | <div class="section" id="finding-slow-code">
 43 | <h2>Finding Slow Code</h2>
 44 | <p>In order to find slow Python code we're going to have to time stuff. We don't want to cover our lovely clean code with temporary timing code and print statements, so how can we:</p>
 45 | <blockquote>
 46 | <ul class="simple">
 47 | <li>Time code without alteringing the code of a function</li>
 48 | <li>Get detailed timing information if the function is run with different arguments</li>
 49 | <li>Switch off the timing at deploy time to reduce the overhead and improve the performance of monitoring</li>
 50 | </ul>
 51 | </blockquote>
 52 | <p>The timing decoriatros below can help with all of these. If your new to decorators and annotations see my previous blog <a class="reference external" href="http://blog.mattalcock.com/2013/1/5/decorates-and-annotations/">post on the subject</a></p>
 53 | </div>
 54 | <div class="section" id="the-timeit-decorator">
 55 | <h2>The Timeit Decorator</h2>
 56 | <div class="highlight"><pre><span class="kn">import</span> <span class="nn">time</span>
 57 | 
 58 | <span class="k">def</span> <span class="nf">timeit</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
 59 | 
 60 |     <span class="k">def</span> <span class="nf">timed</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">):</span>
 61 |         <span class="n">ts</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
 62 |         <span class="n">result</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">)</span>
 63 |         <span class="n">te</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
 64 | 
 65 |         <span class="k">print</span> <span class="s">&#39;func:</span><span class="si">%r</span><span class="s"> args:[</span><span class="si">%r</span><span class="s">, </span><span class="si">%r</span><span class="s">] took: </span><span class="si">%2.4f</span><span class="s"> sec&#39;</span> <span class="o">%</span> \
 66 |           <span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">__name__</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kw</span><span class="p">,</span> <span class="n">te</span><span class="o">-</span><span class="n">ts</span><span class="p">)</span>
 67 |         <span class="k">return</span> <span class="n">result</span>
 68 | 
 69 |     <span class="k">return</span> <span class="n">timed</span>
 70 | </pre></div>
 71 | <p>Using the decorator is easy either use annotations.</p>
 72 | <div class="highlight"><pre><span class="nd">@timeit</span>
 73 | <span class="k">def</span> <span class="nf">compute_magic</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 74 |     <span class="c">#function definition</span>
 75 |     <span class="o">....</span>
 76 | </pre></div>
 77 | <p>Or realias the function you want to time.</p>
 78 | <div class="highlight"><pre><span class="n">compute_magic</span> <span class="o">=</span> <span class="n">timeit</span><span class="p">(</span><span class="n">compute_magic</span><span class="p">)</span>
 79 | </pre></div>
 80 | <p>Sometimes you'll want to remove the code timing. You can either do this by remvoing the timeit annotations before deployment or you can you a configuration switch to enable the decorator to wrap the function in timing code.</p>
 81 | <div class="highlight"><pre><span class="kn">import</span> <span class="nn">time</span>
 82 | 
 83 | <span class="c">#from config import TIME_FUCNTIONS</span>
 84 | <span class="n">TIME_FUCNTIONS</span> <span class="o">=</span> <span class="bp">False</span>
 85 | 
 86 | <span class="k">def</span> <span class="nf">timeit</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
 87 |     <span class="k">if</span> <span class="ow">not</span> <span class="n">TIME_FUCNTIONS</span><span class="p">:</span>
 88 |         <span class="k">return</span> <span class="n">f</span>
 89 |     <span class="k">else</span><span class="p">:</span>
 90 |         <span class="k">def</span> <span class="nf">timed</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">):</span>
 91 |             <span class="n">ts</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
 92 |             <span class="n">result</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">)</span>
 93 |             <span class="n">te</span> <span class="o">=</span> <span class="n">time</span><span class="o">.</span><span class="n">time</span><span class="p">()</span>
 94 | 
 95 |             <span class="k">print</span> <span class="s">&#39;func:</span><span class="si">%r</span><span class="s"> args:[</span><span class="si">%r</span><span class="s">, </span><span class="si">%r</span><span class="s">] took: </span><span class="si">%2.4f</span><span class="s"> sec&#39;</span> <span class="o">%</span> \
 96 |                 <span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">__name__</span><span class="p">,</span> <span class="n">args</span><span class="p">,</span> <span class="n">kw</span><span class="p">,</span> <span class="n">te</span><span class="o">-</span><span class="n">ts</span><span class="p">)</span>
 97 |             <span class="k">return</span> <span class="n">result</span>
 98 | 
 99 |         <span class="k">return</span> <span class="n">timed</span>
100 | </pre></div>
101 | <p>By simpley changing the TIME_FUNCTIONS configuration swtich the functions will not decorated. I find having these swtiches in a common config file/folder often helps.</p>
102 | <p>All this code and the majorty of code from my posts can be found in the hack repo of my github account. Please take a look <a class="reference external" href="https://github.com/mattalcock/hacks">here</a> . I hope it's helped if there are any questions about the above or you'd like to understand more about timing code in Python drop me a mail.</p>
103 | <p>Matt</p>
104 | </div>
105 | 
106 | 
107 |   
108 |   <p class=tags>This entry was tagged
109 |     
110 |       <a href="/tags/performance/">performance</a> and 
111 |       <a href="/tags/python/">python</a>
112 |   
113 | 
114 |   
115 | 
116 |       </div>
117 |       <div class=footer>
118 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
119 |         <p>&nbsp;
120 |         <p>&copy; Copyright 2013 by Matt Alcock. 
121 |         <p>
122 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
123 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
124 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
125 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
126 |         <p>
127 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
128 |       </div>
129 |     </div>
130 |     <script type="text/javascript">
131 | 
132 |       var _gaq = _gaq || [];
133 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
134 |       _gaq.push(['_trackPageview']);
135 | 
136 |       (function() {
137 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
138 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
139 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
140 |       })();
141 | 
142 |     </script>
143 |   </body>
144 | </html>
145 | 


--------------------------------------------------------------------------------
/_build/tags/performance/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2013-02-24T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Timing Python Code</title>
12 |     <id>http://blog.mattalcock.com/2013/2/24/timing-python-code</id>
13 |     <updated>2013-02-24T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2013/2/24/timing-python-code" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;This post outlines why timing code in Python is important and provides some simple decorators that can help you time your code without the concerns and worries of peppering your lovely clean code with temporary timing and print statements.&lt;/p&gt;
19 | &lt;p&gt;Scroll down if your just after the decortor code to time functions....&lt;/p&gt;
20 | &lt;div class="section" id="python-vs-speed"&gt;
21 | &lt;h2&gt;Python vs Speed&lt;/h2&gt;
22 | &lt;p&gt;One of things Python was orignally critisied for was speed. Like lots of Dynamic Lanaguages there is an overhead in keeping tracking of types and because code is interpreted at runtime instead of being compiled to native code at compile time dynmaic langauges like Python will always be a little slower.&lt;/p&gt;
23 | &lt;p&gt;Where Python shines is in it's power and ability to allow progrmaers to opmtimise and focus on the algorthim. Focusing on the complextity of the problem and the algorithms order of magnitidue rather than the low level detials of memory management, pointers etc can often have massive benefits. Ask any computer science student and they can list of nermerous teachings that show alogrithm and data strucute design will beat brute force compuatation power.&lt;/p&gt;
24 | &lt;p&gt;If your looking to build something where microsecounds count then I'd turn to C or Java. &lt;a class="reference external" href="http://pypy.org/"&gt;PyPy&lt;/a&gt; and other sophiticated JIT (Just in Time) compliers can help and they seam to be the future for Pytohn solutions in this space. Another aterntative is to find the slow code and either optimise that function or write a C plugin for Python for your very specific task. This last approach seams very popular in the finaince industry where milliseconds mean dollars but they still need the felxiablity and speed of devlelopment benefits that come with a dynamic lanaguage.&lt;/p&gt;
25 | &lt;p&gt;More often than not slow code just needs some refactoring work, a new support data strucutre or a change in the complexity of processing. So the challenge is really not how can I speed up my code but what code needs my attention.&lt;/p&gt;
26 | &lt;/div&gt;
27 | &lt;div class="section" id="finding-slow-code"&gt;
28 | &lt;h2&gt;Finding Slow Code&lt;/h2&gt;
29 | &lt;p&gt;In order to find slow Python code we're going to have to time stuff. We don't want to cover our lovely clean code with temporary timing code and print statements, so how can we:&lt;/p&gt;
30 | &lt;blockquote&gt;
31 | &lt;ul class="simple"&gt;
32 | &lt;li&gt;Time code without alteringing the code of a function&lt;/li&gt;
33 | &lt;li&gt;Get detailed timing information if the function is run with different arguments&lt;/li&gt;
34 | &lt;li&gt;Switch off the timing at deploy time to reduce the overhead and improve the performance of monitoring&lt;/li&gt;
35 | &lt;/ul&gt;
36 | &lt;/blockquote&gt;
37 | &lt;p&gt;The timing decoriatros below can help with all of these. If your new to decorators and annotations see my previous blog &lt;a class="reference external" href="http://blog.mattalcock.com/2013/1/5/decorates-and-annotations/"&gt;post on the subject&lt;/a&gt;&lt;/p&gt;
38 | &lt;/div&gt;
39 | &lt;div class="section" id="the-timeit-decorator"&gt;
40 | &lt;h2&gt;The Timeit Decorator&lt;/h2&gt;
41 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;time&lt;/span&gt;
42 | 
43 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;timeit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
44 | 
45 |     &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;timed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
46 |         &lt;span class="n"&gt;ts&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
47 |         &lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
48 |         &lt;span class="n"&gt;te&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
49 | 
50 |         &lt;span class="k"&gt;print&lt;/span&gt; &lt;span class="s"&gt;&amp;#39;func:&lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt; args:[&lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt;, &lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt;] took: &lt;/span&gt;&lt;span class="si"&gt;%2.4f&lt;/span&gt;&lt;span class="s"&gt; sec&amp;#39;&lt;/span&gt; &lt;span class="o"&gt;%&lt;/span&gt; \
51 |           &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;__name__&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;te&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="n"&gt;ts&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
52 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;result&lt;/span&gt;
53 | 
54 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;timed&lt;/span&gt;
55 | &lt;/pre&gt;&lt;/div&gt;
56 | &lt;p&gt;Using the decorator is easy either use annotations.&lt;/p&gt;
57 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="nd"&gt;@timeit&lt;/span&gt;
58 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;compute_magic&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
59 |     &lt;span class="c"&gt;#function definition&lt;/span&gt;
60 |     &lt;span class="o"&gt;....&lt;/span&gt;
61 | &lt;/pre&gt;&lt;/div&gt;
62 | &lt;p&gt;Or realias the function you want to time.&lt;/p&gt;
63 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;compute_magic&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;timeit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;compute_magic&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
64 | &lt;/pre&gt;&lt;/div&gt;
65 | &lt;p&gt;Sometimes you'll want to remove the code timing. You can either do this by remvoing the timeit annotations before deployment or you can you a configuration switch to enable the decorator to wrap the function in timing code.&lt;/p&gt;
66 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;time&lt;/span&gt;
67 | 
68 | &lt;span class="c"&gt;#from config import TIME_FUCNTIONS&lt;/span&gt;
69 | &lt;span class="n"&gt;TIME_FUCNTIONS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="bp"&gt;False&lt;/span&gt;
70 | 
71 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;timeit&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
72 |     &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="ow"&gt;not&lt;/span&gt; &lt;span class="n"&gt;TIME_FUCNTIONS&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
73 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;
74 |     &lt;span class="k"&gt;else&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
75 |         &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;timed&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
76 |             &lt;span class="n"&gt;ts&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
77 |             &lt;span class="n"&gt;result&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
78 |             &lt;span class="n"&gt;te&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;time&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
79 | 
80 |             &lt;span class="k"&gt;print&lt;/span&gt; &lt;span class="s"&gt;&amp;#39;func:&lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt; args:[&lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt;, &lt;/span&gt;&lt;span class="si"&gt;%r&lt;/span&gt;&lt;span class="s"&gt;] took: &lt;/span&gt;&lt;span class="si"&gt;%2.4f&lt;/span&gt;&lt;span class="s"&gt; sec&amp;#39;&lt;/span&gt; &lt;span class="o"&gt;%&lt;/span&gt; \
81 |                 &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;__name__&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;te&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="n"&gt;ts&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
82 |             &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;result&lt;/span&gt;
83 | 
84 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;timed&lt;/span&gt;
85 | &lt;/pre&gt;&lt;/div&gt;
86 | &lt;p&gt;By simpley changing the TIME_FUNCTIONS configuration swtich the functions will not decorated. I find having these swtiches in a common config file/folder often helps.&lt;/p&gt;
87 | &lt;p&gt;All this code and the majorty of code from my posts can be found in the hack repo of my github account. Please take a look &lt;a class="reference external" href="https://github.com/mattalcock/hacks"&gt;here&lt;/a&gt; . I hope it's helped if there are any questions about the above or you'd like to understand more about timing code in Python drop me a mail.&lt;/p&gt;
88 | &lt;p&gt;Matt&lt;/p&gt;
89 | &lt;/div&gt;
90 | </content>
91 |   </entry>
92 | </feed>
93 | 
94 | 


--------------------------------------------------------------------------------
/_build/2012/12/5/python-spell-checker/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset=utf-8>
  5 |   
  6 |     <title>Did you mean &#39;python spell checker&#39;? | Matt Alcock - A Data Scientist with a passion for Python</title>
  7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
  8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
  9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
 10 |   
 11 |   </head>
 12 |   <body>
 13 |     <div class=container>
 14 |       <div class=header>
 15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
 16 |       </div>
 17 |       <div class=navigation>
 18 |         <ul>
 19 |           <li><a href="/">Blog</a>
 20 |           <li><a href="/archive/">Archive</a>
 21 |           <li><a href="/tags/">Tags</a>
 22 |           <li><a href="/about/">About</a>
 23 |         </ul>
 24 |       </div>
 25 |       <div class=body>
 26 |       
 27 |   <h1 class="title">Did you mean 'python spell checker'?</h1>
 28 | 
 29 |   
 30 |   <p class=date>written on Wednesday, December 5, 2012
 31 |   
 32 | 
 33 |   <p>Have you ever been really impressed with Googles 'Did you mean....' spell checker?
 34 | Have you ever just typed something into google to help you with your spelling?</p>
 35 | <p>My answer to the above questions above would be Yes, all the time!</p>
 36 | <p>In a fantastic post I read some years ago Peter Norvig outlined how Google’s ‘did you mean’ spelling corrector uses probability theory, large training sets and some elegant statistical language processing to be so effective.  Type in a search like 'speling' and Google comes back in 0.1 seconds or so with Did you mean: 'spelling'. Below is a toy spelling corrector in python that achieves 80 to 90% accuracy and is very fast. It's written in a fanstically impressive 21 lines of code. It uses list comprehensions, and some of my favorite data structures (sets and default dictionaries).</p>
 37 | <p>The code and supporting data files can be found in my hacks public repo under the spellcheck folder.</p>
 38 | <p>The data seed comes from a big.txt file that consists of about a million words. The file is a concatenation of several public domain books from Project Gutenberg and lists of most frequent words from Wiktionary and the British National Corpus. It uses a simple training method of just counting the occurrences of each word in the big text file. Obviously Google has a lot more data to seed this spelling checker with but I was suprised at how effective this relatively small seed was.</p>
 39 | <div class="highlight"><pre><span class="kn">import</span> <span class="nn">re</span><span class="o">,</span> <span class="nn">collections</span>
 40 | 
 41 | <span class="k">def</span> <span class="nf">words</span><span class="p">(</span><span class="n">text</span><span class="p">):</span>
 42 |     <span class="k">return</span> <span class="n">re</span><span class="o">.</span><span class="n">findall</span><span class="p">(</span><span class="s">&#39;[a-z]+&#39;</span><span class="p">,</span> <span class="n">text</span><span class="o">.</span><span class="n">lower</span><span class="p">())</span>
 43 | 
 44 | <span class="k">def</span> <span class="nf">train</span><span class="p">(</span><span class="n">features</span><span class="p">):</span>
 45 |     <span class="n">model</span> <span class="o">=</span> <span class="n">collections</span><span class="o">.</span><span class="n">defaultdict</span><span class="p">(</span><span class="k">lambda</span><span class="p">:</span> <span class="mi">1</span><span class="p">)</span>
 46 |     <span class="k">for</span> <span class="n">f</span> <span class="ow">in</span> <span class="n">features</span><span class="p">:</span>
 47 |         <span class="n">model</span><span class="p">[</span><span class="n">f</span><span class="p">]</span> <span class="o">+=</span> <span class="mi">1</span>
 48 |     <span class="k">return</span> <span class="n">model</span>
 49 | 
 50 | <span class="n">NWORDS</span> <span class="o">=</span> <span class="n">train</span><span class="p">(</span><span class="n">words</span><span class="p">(</span><span class="nb">file</span><span class="p">(</span><span class="s">&#39;big.txt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()))</span>
 51 | <span class="n">alphabet</span> <span class="o">=</span> <span class="s">&#39;abcdefghijklmnopqrstuvwxyz&#39;</span>
 52 | 
 53 | <span class="k">def</span> <span class="nf">edits1</span><span class="p">(</span><span class="n">word</span><span class="p">):</span>
 54 |     <span class="n">s</span> <span class="o">=</span> <span class="p">[(</span><span class="n">word</span><span class="p">[:</span><span class="n">i</span><span class="p">],</span> <span class="n">word</span><span class="p">[</span><span class="n">i</span><span class="p">:])</span> <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">word</span><span class="p">)</span> <span class="o">+</span> <span class="mi">1</span><span class="p">)]</span>
 55 |     <span class="n">deletes</span>    <span class="o">=</span> <span class="p">[</span><span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">for</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="ow">in</span> <span class="n">s</span> <span class="k">if</span> <span class="n">b</span><span class="p">]</span>
 56 |     <span class="n">transposes</span> <span class="o">=</span> <span class="p">[</span><span class="n">a</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">2</span><span class="p">:]</span> <span class="k">for</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="ow">in</span> <span class="n">s</span> <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">b</span><span class="p">)</span><span class="o">&gt;</span><span class="mi">1</span><span class="p">]</span>
 57 |     <span class="n">replaces</span>   <span class="o">=</span> <span class="p">[</span><span class="n">a</span> <span class="o">+</span> <span class="n">c</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">for</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="ow">in</span> <span class="n">s</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">alphabet</span> <span class="k">if</span> <span class="n">b</span><span class="p">]</span>
 58 |     <span class="n">inserts</span>    <span class="o">=</span> <span class="p">[</span><span class="n">a</span> <span class="o">+</span> <span class="n">c</span> <span class="o">+</span> <span class="n">b</span>     <span class="k">for</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="ow">in</span> <span class="n">s</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">alphabet</span><span class="p">]</span>
 59 |     <span class="k">return</span> <span class="nb">set</span><span class="p">(</span><span class="n">deletes</span> <span class="o">+</span> <span class="n">transposes</span> <span class="o">+</span> <span class="n">replaces</span> <span class="o">+</span> <span class="n">inserts</span><span class="p">)</span>
 60 | 
 61 | <span class="k">def</span> <span class="nf">known_edits2</span><span class="p">(</span><span class="n">word</span><span class="p">):</span>
 62 |     <span class="k">return</span> <span class="nb">set</span><span class="p">(</span><span class="n">e2</span> <span class="k">for</span> <span class="n">e1</span> <span class="ow">in</span> <span class="n">edits1</span><span class="p">(</span><span class="n">word</span><span class="p">)</span> <span class="k">for</span> <span class="n">e2</span> <span class="ow">in</span> <span class="n">edits1</span><span class="p">(</span><span class="n">e1</span><span class="p">)</span> <span class="k">if</span> <span class="n">e2</span> <span class="ow">in</span> <span class="n">NWORDS</span><span class="p">)</span>
 63 | 
 64 | <span class="k">def</span> <span class="nf">known</span><span class="p">(</span><span class="n">words</span><span class="p">):</span>
 65 |     <span class="k">return</span> <span class="nb">set</span><span class="p">(</span><span class="n">w</span> <span class="k">for</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">words</span> <span class="k">if</span> <span class="n">w</span> <span class="ow">in</span> <span class="n">NWORDS</span><span class="p">)</span>
 66 | 
 67 | <span class="k">def</span> <span class="nf">correct</span><span class="p">(</span><span class="n">word</span><span class="p">):</span>
 68 |     <span class="n">candidates</span> <span class="o">=</span> <span class="n">known</span><span class="p">([</span><span class="n">word</span><span class="p">])</span> <span class="ow">or</span> <span class="n">known</span><span class="p">(</span><span class="n">edits1</span><span class="p">(</span><span class="n">word</span><span class="p">))</span> <span class="ow">or</span>    <span class="n">known_edits2</span><span class="p">(</span><span class="n">word</span><span class="p">)</span> <span class="ow">or</span> <span class="p">[</span><span class="n">word</span><span class="p">]</span>
 69 |     <span class="k">return</span> <span class="nb">max</span><span class="p">(</span><span class="n">candidates</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">NWORDS</span><span class="o">.</span><span class="n">get</span><span class="p">)</span>
 70 | </pre></div>
 71 | <p>If your new to python some of the above code my look complicated and hard to follow. Although dense I love Peter's use of list comprehensions and generators. The use of nested function composits is also very efficient and I've noticed a massive speed up in using such approaches when injesting or processing large data files.</p>
 72 | <p>An exmaple of nested function composition is:</p>
 73 | <div class="highlight"><pre><span class="n">NWORDS</span> <span class="o">=</span> <span class="n">train</span><span class="p">(</span><span class="n">words</span><span class="p">(</span><span class="nb">file</span><span class="p">(</span><span class="s">&#39;big.txt&#39;</span><span class="p">)</span><span class="o">.</span><span class="n">read</span><span class="p">()))</span>
 74 | </pre></div>
 75 | <p>An example of complex list comprehension is:</p>
 76 | <div class="highlight"><pre><span class="p">[</span><span class="n">a</span> <span class="o">+</span> <span class="n">c</span> <span class="o">+</span> <span class="n">b</span><span class="p">[</span><span class="mi">1</span><span class="p">:]</span> <span class="k">for</span> <span class="n">a</span><span class="p">,</span> <span class="n">b</span> <span class="ow">in</span> <span class="n">s</span> <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="n">alphabet</span> <span class="k">if</span> <span class="n">b</span><span class="p">]</span>
 77 | </pre></div>
 78 | <p>The final thing I really like in this code snippet is the overriding of the key function when max is called in the 'correct' function. This is a great way to find the word with the highest value in a dictionaty of word-&gt;count mappings.</p>
 79 | <div class="highlight"><pre><span class="k">return</span> <span class="nb">max</span><span class="p">(</span><span class="n">candidates</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">NWORDS</span><span class="o">.</span><span class="n">get</span><span class="p">)</span>
 80 | </pre></div>
 81 | <p>The code is simple and elegant and basically generates a set of candidates words based on the partial or badly spelt word (aka the original word). The most often used word from the candiates is chosen. Peter expalins how Bayes Theorem is used to select the best correction given the original word.</p>
 82 | <p>See more details, test results and further work at Peter Novig’s <a class="reference external" href="http://norvig.com/spell-correct.html">site</a> .</p>
 83 | 
 84 | 
 85 |   
 86 |   <p class=tags>This entry was tagged
 87 |     
 88 |       <a href="/tags/bayes/">bayes</a>, 
 89 |       <a href="/tags/probability/">probability</a>, 
 90 |       <a href="/tags/python/">python</a> and 
 91 |       <a href="/tags/statistics/">statistics</a>
 92 |   
 93 | 
 94 |   
 95 | 
 96 |       </div>
 97 |       <div class=footer>
 98 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
 99 |         <p>&nbsp;
100 |         <p>&copy; Copyright 2013 by Matt Alcock. 
101 |         <p>
102 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
103 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
104 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
105 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
106 |         <p>
107 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
108 |       </div>
109 |     </div>
110 |     <script type="text/javascript">
111 | 
112 |       var _gaq = _gaq || [];
113 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
114 |       _gaq.push(['_trackPageview']);
115 | 
116 |       (function() {
117 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
118 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
119 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
120 |       })();
121 | 
122 |     </script>
123 |   </body>
124 | </html>
125 | 


--------------------------------------------------------------------------------
/_build/tags/bayes/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2012-12-05T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Did you mean 'python spell checker'?</title>
12 |     <id>http://blog.mattalcock.com/2012/12/5/python-spell-checker</id>
13 |     <updated>2012-12-05T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2012/12/5/python-spell-checker" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;Have you ever been really impressed with Googles 'Did you mean....' spell checker?
19 | Have you ever just typed something into google to help you with your spelling?&lt;/p&gt;
20 | &lt;p&gt;My answer to the above questions above would be Yes, all the time!&lt;/p&gt;
21 | &lt;p&gt;In a fantastic post I read some years ago Peter Norvig outlined how Google’s ‘did you mean’ spelling corrector uses probability theory, large training sets and some elegant statistical language processing to be so effective.  Type in a search like 'speling' and Google comes back in 0.1 seconds or so with Did you mean: 'spelling'. Below is a toy spelling corrector in python that achieves 80 to 90% accuracy and is very fast. It's written in a fanstically impressive 21 lines of code. It uses list comprehensions, and some of my favorite data structures (sets and default dictionaries).&lt;/p&gt;
22 | &lt;p&gt;The code and supporting data files can be found in my hacks public repo under the spellcheck folder.&lt;/p&gt;
23 | &lt;p&gt;The data seed comes from a big.txt file that consists of about a million words. The file is a concatenation of several public domain books from Project Gutenberg and lists of most frequent words from Wiktionary and the British National Corpus. It uses a simple training method of just counting the occurrences of each word in the big text file. Obviously Google has a lot more data to seed this spelling checker with but I was suprised at how effective this relatively small seed was.&lt;/p&gt;
24 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;re&lt;/span&gt;&lt;span class="o"&gt;,&lt;/span&gt; &lt;span class="nn"&gt;collections&lt;/span&gt;
25 | 
26 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
27 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;re&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;findall&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;[a-z]+&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;lower&lt;/span&gt;&lt;span class="p"&gt;())&lt;/span&gt;
28 | 
29 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
30 |     &lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;collections&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;defaultdict&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;lambda&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
31 |     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
32 |         &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;
33 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;model&lt;/span&gt;
34 | 
35 | &lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
36 | &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s"&gt;&amp;#39;abcdefghijklmnopqrstuvwxyz&amp;#39;&lt;/span&gt;
37 | 
38 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
39 |     &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[:&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;:])&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;i&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="nb"&gt;range&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)]&lt;/span&gt;
40 |     &lt;span class="n"&gt;deletes&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
41 |     &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
42 |     &lt;span class="n"&gt;replaces&lt;/span&gt;   &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
43 |     &lt;span class="n"&gt;inserts&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
44 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;deletes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;replaces&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;inserts&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
45 | 
46 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
47 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e1&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
48 | 
49 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
50 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;words&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
51 | 
52 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;correct&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
53 |     &lt;span class="n"&gt;candidates&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;([&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt;    &lt;span class="n"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
54 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
55 | &lt;/pre&gt;&lt;/div&gt;
56 | &lt;p&gt;If your new to python some of the above code my look complicated and hard to follow. Although dense I love Peter's use of list comprehensions and generators. The use of nested function composits is also very efficient and I've noticed a massive speed up in using such approaches when injesting or processing large data files.&lt;/p&gt;
57 | &lt;p&gt;An exmaple of nested function composition is:&lt;/p&gt;
58 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
59 | &lt;/pre&gt;&lt;/div&gt;
60 | &lt;p&gt;An example of complex list comprehension is:&lt;/p&gt;
61 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
62 | &lt;/pre&gt;&lt;/div&gt;
63 | &lt;p&gt;The final thing I really like in this code snippet is the overriding of the key function when max is called in the 'correct' function. This is a great way to find the word with the highest value in a dictionaty of word-&amp;gt;count mappings.&lt;/p&gt;
64 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
65 | &lt;/pre&gt;&lt;/div&gt;
66 | &lt;p&gt;The code is simple and elegant and basically generates a set of candidates words based on the partial or badly spelt word (aka the original word). The most often used word from the candiates is chosen. Peter expalins how Bayes Theorem is used to select the best correction given the original word.&lt;/p&gt;
67 | &lt;p&gt;See more details, test results and further work at Peter Novig’s &lt;a class="reference external" href="http://norvig.com/spell-correct.html"&gt;site&lt;/a&gt; .&lt;/p&gt;
68 | </content>
69 |   </entry>
70 | </feed>
71 | 
72 | 


--------------------------------------------------------------------------------
/_build/tags/probability/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2012-12-05T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Did you mean 'python spell checker'?</title>
12 |     <id>http://blog.mattalcock.com/2012/12/5/python-spell-checker</id>
13 |     <updated>2012-12-05T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2012/12/5/python-spell-checker" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;Have you ever been really impressed with Googles 'Did you mean....' spell checker?
19 | Have you ever just typed something into google to help you with your spelling?&lt;/p&gt;
20 | &lt;p&gt;My answer to the above questions above would be Yes, all the time!&lt;/p&gt;
21 | &lt;p&gt;In a fantastic post I read some years ago Peter Norvig outlined how Google’s ‘did you mean’ spelling corrector uses probability theory, large training sets and some elegant statistical language processing to be so effective.  Type in a search like 'speling' and Google comes back in 0.1 seconds or so with Did you mean: 'spelling'. Below is a toy spelling corrector in python that achieves 80 to 90% accuracy and is very fast. It's written in a fanstically impressive 21 lines of code. It uses list comprehensions, and some of my favorite data structures (sets and default dictionaries).&lt;/p&gt;
22 | &lt;p&gt;The code and supporting data files can be found in my hacks public repo under the spellcheck folder.&lt;/p&gt;
23 | &lt;p&gt;The data seed comes from a big.txt file that consists of about a million words. The file is a concatenation of several public domain books from Project Gutenberg and lists of most frequent words from Wiktionary and the British National Corpus. It uses a simple training method of just counting the occurrences of each word in the big text file. Obviously Google has a lot more data to seed this spelling checker with but I was suprised at how effective this relatively small seed was.&lt;/p&gt;
24 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;re&lt;/span&gt;&lt;span class="o"&gt;,&lt;/span&gt; &lt;span class="nn"&gt;collections&lt;/span&gt;
25 | 
26 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
27 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;re&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;findall&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;[a-z]+&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;lower&lt;/span&gt;&lt;span class="p"&gt;())&lt;/span&gt;
28 | 
29 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
30 |     &lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;collections&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;defaultdict&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;lambda&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
31 |     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
32 |         &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;
33 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;model&lt;/span&gt;
34 | 
35 | &lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
36 | &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s"&gt;&amp;#39;abcdefghijklmnopqrstuvwxyz&amp;#39;&lt;/span&gt;
37 | 
38 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
39 |     &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[:&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;:])&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;i&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="nb"&gt;range&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)]&lt;/span&gt;
40 |     &lt;span class="n"&gt;deletes&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
41 |     &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
42 |     &lt;span class="n"&gt;replaces&lt;/span&gt;   &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
43 |     &lt;span class="n"&gt;inserts&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
44 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;deletes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;replaces&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;inserts&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
45 | 
46 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
47 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e1&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
48 | 
49 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
50 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;words&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
51 | 
52 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;correct&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
53 |     &lt;span class="n"&gt;candidates&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;([&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt;    &lt;span class="n"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
54 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
55 | &lt;/pre&gt;&lt;/div&gt;
56 | &lt;p&gt;If your new to python some of the above code my look complicated and hard to follow. Although dense I love Peter's use of list comprehensions and generators. The use of nested function composits is also very efficient and I've noticed a massive speed up in using such approaches when injesting or processing large data files.&lt;/p&gt;
57 | &lt;p&gt;An exmaple of nested function composition is:&lt;/p&gt;
58 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
59 | &lt;/pre&gt;&lt;/div&gt;
60 | &lt;p&gt;An example of complex list comprehension is:&lt;/p&gt;
61 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
62 | &lt;/pre&gt;&lt;/div&gt;
63 | &lt;p&gt;The final thing I really like in this code snippet is the overriding of the key function when max is called in the 'correct' function. This is a great way to find the word with the highest value in a dictionaty of word-&amp;gt;count mappings.&lt;/p&gt;
64 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
65 | &lt;/pre&gt;&lt;/div&gt;
66 | &lt;p&gt;The code is simple and elegant and basically generates a set of candidates words based on the partial or badly spelt word (aka the original word). The most often used word from the candiates is chosen. Peter expalins how Bayes Theorem is used to select the best correction given the original word.&lt;/p&gt;
67 | &lt;p&gt;See more details, test results and further work at Peter Novig’s &lt;a class="reference external" href="http://norvig.com/spell-correct.html"&gt;site&lt;/a&gt; .&lt;/p&gt;
68 | </content>
69 |   </entry>
70 | </feed>
71 | 
72 | 


--------------------------------------------------------------------------------
/_build/tags/statistics/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2012-12-05T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Did you mean 'python spell checker'?</title>
12 |     <id>http://blog.mattalcock.com/2012/12/5/python-spell-checker</id>
13 |     <updated>2012-12-05T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2012/12/5/python-spell-checker" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;Have you ever been really impressed with Googles 'Did you mean....' spell checker?
19 | Have you ever just typed something into google to help you with your spelling?&lt;/p&gt;
20 | &lt;p&gt;My answer to the above questions above would be Yes, all the time!&lt;/p&gt;
21 | &lt;p&gt;In a fantastic post I read some years ago Peter Norvig outlined how Google’s ‘did you mean’ spelling corrector uses probability theory, large training sets and some elegant statistical language processing to be so effective.  Type in a search like 'speling' and Google comes back in 0.1 seconds or so with Did you mean: 'spelling'. Below is a toy spelling corrector in python that achieves 80 to 90% accuracy and is very fast. It's written in a fanstically impressive 21 lines of code. It uses list comprehensions, and some of my favorite data structures (sets and default dictionaries).&lt;/p&gt;
22 | &lt;p&gt;The code and supporting data files can be found in my hacks public repo under the spellcheck folder.&lt;/p&gt;
23 | &lt;p&gt;The data seed comes from a big.txt file that consists of about a million words. The file is a concatenation of several public domain books from Project Gutenberg and lists of most frequent words from Wiktionary and the British National Corpus. It uses a simple training method of just counting the occurrences of each word in the big text file. Obviously Google has a lot more data to seed this spelling checker with but I was suprised at how effective this relatively small seed was.&lt;/p&gt;
24 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;re&lt;/span&gt;&lt;span class="o"&gt;,&lt;/span&gt; &lt;span class="nn"&gt;collections&lt;/span&gt;
25 | 
26 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
27 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;re&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;findall&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;[a-z]+&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;lower&lt;/span&gt;&lt;span class="p"&gt;())&lt;/span&gt;
28 | 
29 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
30 |     &lt;span class="n"&gt;model&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;collections&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;defaultdict&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;lambda&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
31 |     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;features&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
32 |         &lt;span class="n"&gt;model&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+=&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;
33 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;model&lt;/span&gt;
34 | 
35 | &lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
36 | &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s"&gt;&amp;#39;abcdefghijklmnopqrstuvwxyz&amp;#39;&lt;/span&gt;
37 | 
38 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
39 |     &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[:&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt; &lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;i&lt;/span&gt;&lt;span class="p"&gt;:])&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;i&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="nb"&gt;range&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)]&lt;/span&gt;
40 |     &lt;span class="n"&gt;deletes&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
41 |     &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="nb"&gt;len&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
42 |     &lt;span class="n"&gt;replaces&lt;/span&gt;   &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
43 |     &lt;span class="n"&gt;inserts&lt;/span&gt;    &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;     &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
44 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;deletes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;transposes&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;replaces&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;inserts&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
45 | 
46 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
47 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e1&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;e1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;e2&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
48 | 
49 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
50 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;words&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;w&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
51 | 
52 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;correct&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
53 |     &lt;span class="n"&gt;candidates&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;([&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;])&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="n"&gt;known&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;edits1&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;))&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt;    &lt;span class="n"&gt;known_edits2&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="ow"&gt;or&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;word&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
54 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
55 | &lt;/pre&gt;&lt;/div&gt;
56 | &lt;p&gt;If your new to python some of the above code my look complicated and hard to follow. Although dense I love Peter's use of list comprehensions and generators. The use of nested function composits is also very efficient and I've noticed a massive speed up in using such approaches when injesting or processing large data files.&lt;/p&gt;
57 | &lt;p&gt;An exmaple of nested function composition is:&lt;/p&gt;
58 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;train&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;words&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;file&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="s"&gt;&amp;#39;big.txt&amp;#39;&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;read&lt;/span&gt;&lt;span class="p"&gt;()))&lt;/span&gt;
59 | &lt;/pre&gt;&lt;/div&gt;
60 | &lt;p&gt;An example of complex list comprehension is:&lt;/p&gt;
61 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;a&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:]&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;a&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;s&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;c&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;alphabet&lt;/span&gt; &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;b&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
62 | &lt;/pre&gt;&lt;/div&gt;
63 | &lt;p&gt;The final thing I really like in this code snippet is the overriding of the key function when max is called in the 'correct' function. This is a great way to find the word with the highest value in a dictionaty of word-&amp;gt;count mappings.&lt;/p&gt;
64 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="nb"&gt;max&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;candidates&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;NWORDS&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
65 | &lt;/pre&gt;&lt;/div&gt;
66 | &lt;p&gt;The code is simple and elegant and basically generates a set of candidates words based on the partial or badly spelt word (aka the original word). The most often used word from the candiates is chosen. Peter expalins how Bayes Theorem is used to select the best correction given the original word.&lt;/p&gt;
67 | &lt;p&gt;See more details, test results and further work at Peter Novig’s &lt;a class="reference external" href="http://norvig.com/spell-correct.html"&gt;site&lt;/a&gt; .&lt;/p&gt;
68 | </content>
69 |   </entry>
70 | </feed>
71 | 
72 | 


--------------------------------------------------------------------------------
/_build/2013/1/5/decorates-and-annotations/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html>
  3 |   <head>
  4 |     <meta charset=utf-8>
  5 |   
  6 |     <title>Decorators &amp; Annotations | Matt Alcock - A Data Scientist with a passion for Python</title>
  7 |     <link rel="stylesheet" href="/static/style.css" type="text/css">
  8 |     <link href="/feed.atom" rel="alternate" title="Recent Blog Posts" type="application/atom+xml">
  9 |     <link rel="stylesheet" href="/static/_pygments.css" type="text/css">
 10 |   
 11 |   </head>
 12 |   <body>
 13 |     <div class=container>
 14 |       <div class=header>
 15 |        <a href="http://mattalcock.com">Matt Alcock</a> - A Data Scientist with a passion for Python
 16 |       </div>
 17 |       <div class=navigation>
 18 |         <ul>
 19 |           <li><a href="/">Blog</a>
 20 |           <li><a href="/archive/">Archive</a>
 21 |           <li><a href="/tags/">Tags</a>
 22 |           <li><a href="/about/">About</a>
 23 |         </ul>
 24 |       </div>
 25 |       <div class=body>
 26 |       
 27 |   <h1 class="title">Decorators &amp; Annotations</h1>
 28 | 
 29 |   
 30 |   <p class=date>written on Saturday, January 5, 2013
 31 |   
 32 | 
 33 |   <p>I wanted to highlight the power of decorators and annotations in python and give the novice Python programmer some insight into how they can be used. If you've only been using Python for a short while then both of these will probably be new.</p>
 34 | <p>Decorators are a way of implementing the famous computer science decorator pattern. This pattern put in simple terms is a mechanism that allow you to inject or modify code in a function. In python you can have two different styles of decorator. The function defined style or the class defined style. I prefer the function style but I'll show you using a class structure as well.</p>
 35 | <p>The best way to explain their use is through a well known example. The below code shows how to functionally compute the Fibonacci numbers.</p>
 36 | <p>The Fibonacci sequence is : [0,1,1,2,3,5,8,13.....] where the nth number equalling the sum of n-1 and n-2.</p>
 37 | <p>An elegant way of computing this is using the below code:</p>
 38 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">fib</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 39 |     <span class="k">if</span> <span class="n">n</span><span class="o">&lt;=</span><span class="mi">0</span><span class="p">:</span>
 40 |         <span class="k">return</span> <span class="mi">0</span>
 41 |     <span class="k">elif</span> <span class="n">n</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span>
 42 |         <span class="k">return</span> <span class="mi">1</span>
 43 |     <span class="k">else</span><span class="p">:</span>
 44 |         <span class="k">return</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
 45 | </pre></div>
 46 | <p>So fib(7) would return 13. As you can see from the code this uses recursion. The challenge with this approach for calculating the fib sequence is that the low 'tail' function calls will get called multiple times. Remvoing this overhead is called 'tail recursion elimination' or TRE. Python doesn't support this and <a class="reference external" href="http://neopythonic.blogspot.co.uk/2009/04/tail-recursion-elimination.html">probably wont</a> . Below shows how running the fib function for just a small n can result in a massive numbers of calls of the tail values.</p>
 47 | <div class="highlight"><pre><span class="n">fib</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="o">=</span> <span class="n">fib</span><span class="p">(</span><span class="mi">6</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">5</span><span class="p">)</span>
 48 | <span class="n">fib</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="o">=</span> <span class="n">fib</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">4</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span>
 49 | <span class="n">fib</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="o">=</span> <span class="n">fib</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">3</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span>
 50 | <span class="o">.....</span>
 51 | <span class="n">fib</span><span class="p">(</span><span class="mi">7</span><span class="p">)</span> <span class="o">=</span> <span class="n">fib</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="o">+</span> <span class="o">..........</span> <span class="p">[</span><span class="n">All</span> <span class="n">fib</span> <span class="n">zeros</span> <span class="ow">and</span> <span class="n">fib</span> <span class="n">ones</span><span class="p">]</span>
 52 | </pre></div>
 53 | <p>A way to make this faster is to use a technique called Memoize. This remembers the result of a function for a given argument, stores it and if called again uses the stored version rather than re calculating. This can speed up the above by many orders of magnitude.</p>
 54 | <p>The best way to implement this function calling memory is by decorating the function with some code that can modify the execution path to check a pre saved store first. Below is the memoize decorator as a function.</p>
 55 | <div class="highlight"><pre><span class="k">def</span> <span class="nf">memoize</span><span class="p">(</span><span class="n">f</span><span class="p">):</span>
 56 |     <span class="n">cache</span><span class="o">=</span> <span class="p">{}</span>
 57 |     <span class="k">def</span> <span class="nf">memf</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">):</span>
 58 |         <span class="n">key</span> <span class="o">=</span> <span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">kw</span><span class="o">.</span><span class="n">items</span><span class="p">())))</span>
 59 |         <span class="k">if</span> <span class="n">key</span> <span class="ow">not</span> <span class="ow">in</span> <span class="n">cache</span><span class="p">:</span>
 60 |             <span class="n">cache</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">f</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">)</span>
 61 |         <span class="k">return</span> <span class="n">cache</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
 62 |     <span class="k">return</span> <span class="n">memf</span>
 63 | </pre></div>
 64 | <p>The memoize decorator above takes a function as an argument. It then creates a new function that stores the results of the function into a cash. The decorator then returns the new function that contains the original function call.
 65 | We can then use some cleaver dynamic language tricks to re alias the fib function to the decorated version.</p>
 66 | <div class="highlight"><pre><span class="n">fib</span> <span class="o">=</span> <span class="n">memoize</span><span class="p">(</span><span class="n">fib</span><span class="p">)</span>
 67 | </pre></div>
 68 | <p>Calling fib after this aliased decoration we can ensure that the decorated function will run instead of the basic fib function
 69 | .
 70 | I hope that explains how decorators work in python and gives you an example of use. So what are annotations?</p>
 71 | <p>Annotations allow us to use decorators all over our code and are actually syntactic sugar (the same thing) as the above aliased line. Rather than re-aliasing fib to the decorated fib we can use annotations at the point of writing the fib function definition.</p>
 72 | <p>An annotated fib function would look like this. Note the simple use of &#64; and the decorator name above the definition.</p>
 73 | <div class="highlight"><pre><span class="nd">@memoize</span>
 74 | <span class="k">def</span> <span class="nf">fib</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
 75 |     <span class="k">if</span> <span class="n">n</span><span class="o">&lt;=</span><span class="mi">0</span><span class="p">:</span>
 76 |         <span class="k">return</span> <span class="mi">0</span>
 77 |     <span class="k">elif</span> <span class="n">n</span><span class="o">==</span><span class="mi">1</span><span class="p">:</span>
 78 |         <span class="k">return</span> <span class="mi">1</span>
 79 |     <span class="k">else</span><span class="p">:</span>
 80 |         <span class="k">return</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">2</span><span class="p">)</span> <span class="o">+</span> <span class="n">fib</span><span class="p">(</span><span class="n">n</span><span class="o">-</span><span class="mi">1</span><span class="p">)</span>
 81 | </pre></div>
 82 | <p>Simple hey! So annotations are just stylish and helpful ways to decorate functions at the place of definition. This really helps when your sharing code and working as a small team because you don't have to look all over the code to see if the function has been re-aliased and decorated it's right above the definition.</p>
 83 | <p>Once of the best uses of this type of decoration using annotations is to log the performance of a function or to perform some detailed profiling. You only need write a single decorator to modify and wrap any function and then you just sprinkle the decorator around your code as annotations depending on what functions you want to time/profile or investigate in detail.</p>
 84 | <p>As I mentioned before there is also a class style to writing decorators, lets use our memoize decorator as an example.</p>
 85 | <p>Written as a class the decorator is:</p>
 86 | <div class="highlight"><pre><span class="k">class</span> <span class="nc">Memoize</span><span class="p">:</span>
 87 | 
 88 |     <span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">f</span><span class="p">):</span>
 89 |         <span class="bp">self</span><span class="o">.</span><span class="n">f</span> <span class="o">=</span> <span class="n">f</span>
 90 |         <span class="bp">self</span><span class="o">.</span><span class="n">cache</span> <span class="o">=</span> <span class="p">{}</span>
 91 | 
 92 |     <span class="k">def</span> <span class="nf">__call__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">):</span>
 93 |         <span class="n">key</span> <span class="o">=</span> <span class="n">key</span> <span class="o">=</span> <span class="p">(</span><span class="n">args</span><span class="p">,</span> <span class="nb">tuple</span><span class="p">(</span><span class="nb">sorted</span><span class="p">(</span><span class="n">kw</span><span class="o">.</span><span class="n">items</span><span class="p">())))</span>
 94 |         <span class="k">if</span> <span class="ow">not</span> <span class="n">key</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cache</span><span class="p">:</span>
 95 |             <span class="bp">self</span><span class="o">.</span><span class="n">cache</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">f</span><span class="p">(</span><span class="o">*</span><span class="n">args</span><span class="p">,</span> <span class="o">**</span><span class="n">kw</span><span class="p">)</span>
 96 |         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">cache</span><span class="p">[</span><span class="n">key</span><span class="p">]</span>
 97 | </pre></div>
 98 | <p>The class has to have to functions to operate as a decorator. __init__ and __call__. Some people find this easier to read and construct others prefer the function style. I think it really depends on how advanced the decorator is going to be.</p>
 99 | <p>The class style can then be applied in the exact same way as the above function style decorator.</p>
100 | <div class="highlight"><pre><span class="n">fib</span> <span class="o">=</span> <span class="n">Memoize</span><span class="p">(</span><span class="n">fib</span><span class="p">)</span>
101 | 
102 | <span class="nd">@Memoize</span>
103 | <span class="k">def</span> <span class="nf">fib</span><span class="p">(</span><span class="n">n</span><span class="p">):</span>
104 |     <span class="k">if</span> <span class="n">n</span><span class="o">&lt;=</span><span class="mi">0</span><span class="p">:</span>
105 |         <span class="k">return</span> <span class="mi">0</span>
106 |    <span class="o">...</span>
107 | </pre></div>
108 | <p>I hope this has helped understand the basics of decorators and annotations. All of the decorator code listed above can be found in the hacks repo on my github account <a class="reference external" href="https://github.com/mattalcock/hacks/tree/master/decorators">here</a></p>
109 | 
110 | 
111 |   
112 |   <p class=tags>This entry was tagged
113 |     
114 |       <a href="/tags/introduction/">introduction</a> and 
115 |       <a href="/tags/python/">python</a>
116 |   
117 | 
118 |   
119 | 
120 |       </div>
121 |       <div class=footer>
122 |         <p>If you can't explain it simply, you don't understand it well enough. - Albert Einstein 
123 |         <p>&nbsp;
124 |         <p>&copy; Copyright 2013 by Matt Alcock. 
125 |         <p>
126 |           Contact me via <a href="mailto:mattalcock@mac.com" target="_blank">mail</a>,
127 |           <a href="http://twitter.com/mattalcock" target="_blank">@mattalcock</a>,
128 |           <a href="http://github.com/mattalcock" target="_blank">github</a>.
129 |         (<a href="/feed.atom" rel="alternate" title="Recent Blog Posts">feed</a>)
130 |         <p>
131 |             Created using rstblog developed by  <a href="http://github.com/mitsuhiko" target="_blank">Armin Ronacher</a>
132 |       </div>
133 |     </div>
134 |     <script type="text/javascript">
135 | 
136 |       var _gaq = _gaq || [];
137 |       _gaq.push(['_setAccount', 'UA-21948353-3']);
138 |       _gaq.push(['_trackPageview']);
139 | 
140 |       (function() {
141 |         var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
142 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
143 |         var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
144 |       })();
145 | 
146 |     </script>
147 |   </body>
148 | </html>
149 | 


--------------------------------------------------------------------------------
/_build/tags/introduction/feed.atom:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 |   <title type="text">Recent Blog Posts</title>
 4 |   <id>http://blog.mattalcock.com/feed.atom</id>
 5 |   <updated>2013-01-05T00:00:00Z</updated>
 6 |   <link href="http://blog.mattalcock.com/" />
 7 |   <link href="http://blog.mattalcock.com/feed.atom" rel="self" />
 8 |   <subtitle type="text">Recent blog posts</subtitle>
 9 |   <generator>Werkzeug</generator>
10 |   <entry xml:base="http://blog.mattalcock.com/feed.atom">
11 |     <title type="text">Decorators &amp; Annotations</title>
12 |     <id>http://blog.mattalcock.com/2013/1/5/decorates-and-annotations</id>
13 |     <updated>2013-01-05T00:00:00Z</updated>
14 |     <link href="http://blog.mattalcock.com/2013/1/5/decorates-and-annotations" />
15 |     <author>
16 |       <name>Matt Alcock</name>
17 |     </author>
18 |     <content type="html">&lt;p&gt;I wanted to highlight the power of decorators and annotations in python and give the novice Python programmer some insight into how they can be used. If you've only been using Python for a short while then both of these will probably be new.&lt;/p&gt;
19 | &lt;p&gt;Decorators are a way of implementing the famous computer science decorator pattern. This pattern put in simple terms is a mechanism that allow you to inject or modify code in a function. In python you can have two different styles of decorator. The function defined style or the class defined style. I prefer the function style but I'll show you using a class structure as well.&lt;/p&gt;
20 | &lt;p&gt;The best way to explain their use is through a well known example. The below code shows how to functionally compute the Fibonacci numbers.&lt;/p&gt;
21 | &lt;p&gt;The Fibonacci sequence is : [0,1,1,2,3,5,8,13.....] where the nth number equalling the sum of n-1 and n-2.&lt;/p&gt;
22 | &lt;p&gt;An elegant way of computing this is using the below code:&lt;/p&gt;
23 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
24 |     &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;=&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
25 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;
26 |     &lt;span class="k"&gt;elif&lt;/span&gt; &lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;==&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
27 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;
28 |     &lt;span class="k"&gt;else&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
29 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
30 | &lt;/pre&gt;&lt;/div&gt;
31 | &lt;p&gt;So fib(7) would return 13. As you can see from the code this uses recursion. The challenge with this approach for calculating the fib sequence is that the low 'tail' function calls will get called multiple times. Remvoing this overhead is called 'tail recursion elimination' or TRE. Python doesn't support this and &lt;a class="reference external" href="http://neopythonic.blogspot.co.uk/2009/04/tail-recursion-elimination.html"&gt;probably wont&lt;/a&gt; . Below shows how running the fib function for just a small n can result in a massive numbers of calls of the tail values.&lt;/p&gt;
32 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;7&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;6&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;5&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
33 | &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;7&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;4&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;4&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
34 | &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;7&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
35 | &lt;span class="o"&gt;.....&lt;/span&gt;
36 | &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;7&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="o"&gt;..........&lt;/span&gt; &lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;All&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt; &lt;span class="n"&gt;zeros&lt;/span&gt; &lt;span class="ow"&gt;and&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt; &lt;span class="n"&gt;ones&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
37 | &lt;/pre&gt;&lt;/div&gt;
38 | &lt;p&gt;A way to make this faster is to use a technique called Memoize. This remembers the result of a function for a given argument, stores it and if called again uses the stored version rather than re calculating. This can speed up the above by many orders of magnitude.&lt;/p&gt;
39 | &lt;p&gt;The best way to implement this function calling memory is by decorating the function with some code that can modify the execution path to check a pre saved store first. Below is the memoize decorator as a function.&lt;/p&gt;
40 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;memoize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
41 |     &lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{}&lt;/span&gt;
42 |     &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;memf&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
43 |         &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nb"&gt;tuple&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;sorted&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;items&lt;/span&gt;&lt;span class="p"&gt;())))&lt;/span&gt;
44 |         &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="ow"&gt;not&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
45 |             &lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
46 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
47 |     &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;memf&lt;/span&gt;
48 | &lt;/pre&gt;&lt;/div&gt;
49 | &lt;p&gt;The memoize decorator above takes a function as an argument. It then creates a new function that stores the results of the function into a cash. The decorator then returns the new function that contains the original function call.
50 | We can then use some cleaver dynamic language tricks to re alias the fib function to the decorated version.&lt;/p&gt;
51 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;fib&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;memoize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
52 | &lt;/pre&gt;&lt;/div&gt;
53 | &lt;p&gt;Calling fib after this aliased decoration we can ensure that the decorated function will run instead of the basic fib function
54 | .
55 | I hope that explains how decorators work in python and gives you an example of use. So what are annotations?&lt;/p&gt;
56 | &lt;p&gt;Annotations allow us to use decorators all over our code and are actually syntactic sugar (the same thing) as the above aliased line. Rather than re-aliasing fib to the decorated fib we can use annotations at the point of writing the fib function definition.&lt;/p&gt;
57 | &lt;p&gt;An annotated fib function would look like this. Note the simple use of &amp;#64; and the decorator name above the definition.&lt;/p&gt;
58 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="nd"&gt;@memoize&lt;/span&gt;
59 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
60 |     &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;=&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
61 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;
62 |     &lt;span class="k"&gt;elif&lt;/span&gt; &lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;==&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
63 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="mi"&gt;1&lt;/span&gt;
64 |     &lt;span class="k"&gt;else&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
65 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;2&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt; &lt;span class="o"&gt;+&lt;/span&gt; &lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;-&lt;/span&gt;&lt;span class="mi"&gt;1&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
66 | &lt;/pre&gt;&lt;/div&gt;
67 | &lt;p&gt;Simple hey! So annotations are just stylish and helpful ways to decorate functions at the place of definition. This really helps when your sharing code and working as a small team because you don't have to look all over the code to see if the function has been re-aliased and decorated it's right above the definition.&lt;/p&gt;
68 | &lt;p&gt;Once of the best uses of this type of decoration using annotations is to log the performance of a function or to perform some detailed profiling. You only need write a single decorator to modify and wrap any function and then you just sprinkle the decorator around your code as annotations depending on what functions you want to time/profile or investigate in detail.&lt;/p&gt;
69 | &lt;p&gt;As I mentioned before there is also a class style to writing decorators, lets use our memoize decorator as an example.&lt;/p&gt;
70 | &lt;p&gt;Written as a class the decorator is:&lt;/p&gt;
71 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="k"&gt;class&lt;/span&gt; &lt;span class="nc"&gt;Memoize&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
72 | 
73 |     &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;__init__&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
74 |         &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;f&lt;/span&gt;
75 |         &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;cache&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;{}&lt;/span&gt;
76 | 
77 |     &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;__call__&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
78 |         &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="nb"&gt;tuple&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="nb"&gt;sorted&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;items&lt;/span&gt;&lt;span class="p"&gt;())))&lt;/span&gt;
79 |         &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="ow"&gt;not&lt;/span&gt; &lt;span class="n"&gt;key&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
80 |             &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;f&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="o"&gt;*&lt;/span&gt;&lt;span class="n"&gt;args&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="o"&gt;**&lt;/span&gt;&lt;span class="n"&gt;kw&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
81 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="bp"&gt;self&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;cache&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="n"&gt;key&lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;
82 | &lt;/pre&gt;&lt;/div&gt;
83 | &lt;p&gt;The class has to have to functions to operate as a decorator. __init__ and __call__. Some people find this easier to read and construct others prefer the function style. I think it really depends on how advanced the decorator is going to be.&lt;/p&gt;
84 | &lt;p&gt;The class style can then be applied in the exact same way as the above function style decorator.&lt;/p&gt;
85 | &lt;div class="highlight"&gt;&lt;pre&gt;&lt;span class="n"&gt;fib&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;Memoize&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
86 | 
87 | &lt;span class="nd"&gt;@Memoize&lt;/span&gt;
88 | &lt;span class="k"&gt;def&lt;/span&gt; &lt;span class="nf"&gt;fib&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt;
89 |     &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;n&lt;/span&gt;&lt;span class="o"&gt;&amp;lt;=&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
90 |         &lt;span class="k"&gt;return&lt;/span&gt; &lt;span class="mi"&gt;0&lt;/span&gt;
91 |    &lt;span class="o"&gt;...&lt;/span&gt;
92 | &lt;/pre&gt;&lt;/div&gt;
93 | &lt;p&gt;I hope this has helped understand the basics of decorators and annotations. All of the decorator code listed above can be found in the hacks repo on my github account &lt;a class="reference external" href="https://github.com/mattalcock/hacks/tree/master/decorators"&gt;here&lt;/a&gt;&lt;/p&gt;
94 | </content>
95 |   </entry>
96 | </feed>
97 | 
98 | 


--------------------------------------------------------------------------------