├── .gitignore
├── README.txt
├── bak
    ├── Makefile
    ├── atchange.py
    ├── combine.py
    ├── day1.txt
    ├── day2-outline.txt
    ├── day3-outline.txt
    ├── llnl-blurb.txt
    ├── llnl-class-iter2.txt
    ├── llnl-class-iter3.txt
    └── llnl-class.txt
├── code
    ├── import
    │   ├── mod_a.py
    │   └── mod_b.py
    └── tests
    │   ├── test_sort.py
    │   └── test_sort2.py
├── course-outline.txt
├── data
    ├── commented-data.txt
    ├── keyvalue.txt
    └── listfile.txt
├── library-tour.txt
├── publish
    ├── Makefile
    ├── c++-wrapping.txt
    ├── code.tar.gz
    ├── code
    │   ├── README.txt
    │   ├── ctypes
    │   │   ├── Makefile
    │   │   ├── hello.c
    │   │   ├── hello.h
    │   │   └── hello_mod.py
    │   ├── hello
    │   │   ├── hello.c
    │   │   ├── hello.h
    │   │   ├── hellomodule.c
    │   │   ├── run.py
    │   │   └── setup.py
    │   ├── parallelpython
    │   │   └── calc_primes.py
    │   ├── profiling
    │   │   ├── count.py
    │   │   ├── hotshot.prof
    │   │   ├── profile1.py
    │   │   ├── run-cprofile
    │   │   ├── run-hotshot
    │   │   ├── run-statprof
    │   │   ├── run-timeit
    │   │   └── statprof.py
    │   ├── psyco
    │   │   └── psyco-test.py
    │   ├── pyrex-hello
    │   │   ├── hello.c
    │   │   ├── hello.h
    │   │   ├── hellomodule.pyx
    │   │   ├── run.py
    │   │   └── setup.py
    │   ├── pyrex-primes
    │   │   ├── primes.pyx
    │   │   ├── run-primes.py
    │   │   └── setup.py
    │   ├── rpy
    │   │   ├── do-pca.py
    │   │   ├── gen-vectors.py
    │   │   └── vectors.txt
    │   ├── run-all.sh
    │   ├── sip
    │   │   ├── Makefile
    │   │   ├── configure.py
    │   │   ├── hello.c
    │   │   ├── hello.h
    │   │   ├── hello.o
    │   │   ├── hello.sip
    │   │   ├── make-libhello.sh
    │   │   └── run.py
    │   └── swig-demo
    │   │   ├── Makefile
    │   │   ├── run.py
    │   │   ├── setup.py
    │   │   ├── swigdemo.cc
    │   │   ├── swigdemo.hh
    │   │   ├── swigdemo.i
    │   │   └── swigdemo.py
    ├── combine.py
    ├── conf.py
    ├── data
    │   ├── commented-data.txt
    │   ├── keyvalue.txt
    │   └── listfile.txt
    ├── day1.txt
    ├── day2.txt
    ├── day3.txt
    ├── gui-gossip.txt
    ├── header.txt
    ├── idiomatic-python-2.txt
    ├── idiomatic-python.txt
    ├── increasing-performance.txt
    ├── index.txt
    ├── multiprocessing.txt
    ├── new-style-classes.txt
    ├── nose-intro.txt
    ├── online-resources.txt
    ├── pyparsing-presentation.txt
    ├── python-30.txt
    ├── run-doctests.py
    ├── structuring-python.txt
    ├── testing-python.txt
    ├── tools.txt
    └── useful-packages.txt
├── run-doctests.py
├── web-presentation.txt
└── xml-parsing.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *~
3 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | Welcome!  You have stumbled upon the class handouts for a course I
 2 | taught at Lawrence Livermore National Lab, June 12-June 14, 2007.
 3 | 
 4 | See ReadTheDocs
 5 | 
 6 |   http://intermediate-and-advanced-software-carpentry.readthedocs.org/en/latest/
 7 | 
 8 | for an up-to-date HTML version produced from the master github repository,
 9 | 
10 |    https://github.com/ctb/advanced-swc
11 | 


--------------------------------------------------------------------------------
/bak/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 
 3 | day3:
 4 | 	python combine.py c++-wrapping.txt multiprocessing.txt useful-packages.txt new-style-classes.txt gui-gossip.txt python-30.txt > day3.txt
 5 | 	rst2html.py day3.txt day3.html
 6 | 
 7 | day2:
 8 | 	python combine.py idiomatic-python-2.txt increasing-performance.txt tools.txt pyparsing-presentation.txt online-resources.txt > day2.txt
 9 | 	rst2html.py day2.txt day2.html
10 | 
11 | day1:
12 | 	python combine.py idiomatic-python.txt structuring-python.txt testing-python.txt nose-intro.txt > day1.txt
13 | 	rst2html.py day1.txt day1.html
14 | 
15 | %.html : %.txt
16 | 	@./run-doctests.py $<
17 | 	@rst2html.py $< $@
18 | 


--------------------------------------------------------------------------------
/bak/atchange.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import sys, os.path, time, os
 3 | 
 4 | changedict = {}
 5 | for filename in sys.argv[1:-1]:
 6 |     changedict[filename] = os.path.getmtime(filename)
 7 | 
 8 | cmd = sys.argv[-1]
 9 | 
10 | while 1:
11 |     changed = False
12 |     for filename, t in changedict.items():
13 |         mt = os.path.getmtime(filename)
14 |         if t != mt:
15 |             changedict[filename] = mt
16 |             changed = True
17 | 
18 |     if changed:
19 |         print 'CHANGE DETECTED'
20 |         os.system(cmd)
21 |         continue
22 | 
23 |     try:
24 |         time.sleep(1)
25 |     except KeyboardInterrupt:
26 |         break
27 |     
28 | 


--------------------------------------------------------------------------------
/bak/combine.py:
--------------------------------------------------------------------------------
1 | import sys
2 | print '.. Contents::'
3 | print ''
4 | 
5 | for i in sys.argv[1:]:
6 |     print open(i).read()
7 |     print ''
8 | 


--------------------------------------------------------------------------------
/bak/day1.txt:
--------------------------------------------------------------------------------
1 | .. Contents::
2 | 
3 | 


--------------------------------------------------------------------------------
/bak/day2-outline.txt:
--------------------------------------------------------------------------------
 1 | Testing: Unit and Functional Tests
 2 | ==================================
 3 | 
 4 | testing-python.txt
 5 | 
 6 | Testing and Maintenance 
 7 | =======================
 8 | 
 9 | Some More Idiomatic Python
10 | ==========================
11 | 
12 | idiomatic-python-2.txt
13 | 
14 | Profiling and Performance
15 | =========================
16 | 
17 |  - profiling and performance (psyco, pyrex)
18 |  - hotshot, the confused state of profilers in Python
19 |  - multiprocessor stuff, event-based stuff
20 | 
21 | (afternoon)
22 | 
23 | Extending with C/C++
24 | ====================
25 | 
26 | simplest possible extension
27 | "it all comes down to parsetuple"
28 | example: khmer
29 | threading and the GIL revisited
30 | 
31 | Extending with C/C++: Wrapping your code automatically
32 | 
33 | Tools to help you work
34 | ======================
35 | 
36 | tools.txt
37 | 
38 | Simple PDB
39 | ----------
40 | 
41 | Modules in the stdlib
42 | =====================
43 | 
44 | library-tour.txt
45 | 
46 | Online Resources for Python
47 | ===========================
48 | 
49 | online-resources.txt
50 | 
51 | 


--------------------------------------------------------------------------------
/bak/day3-outline.txt:
--------------------------------------------------------------------------------
 1 | c/c++ <-> Python
 2 | ----------------
 3 | 
 4 | c++-wrapping.txt
 5 | 
 6 | Multiprocessing options
 7 | -----------------------
 8 | 
 9 | multiprocessing.txt
10 | 
11 | Miscellaneous modules
12 | ---------------------
13 | 
14 | useful-packages.txt
15 | 
16 | Idiomatic Python Take 3: new-style classes, and metaclasses ;)
17 | --------------------------------------------------------------
18 | 
19 | new-style-classes.txt
20 | 
21 | === break ===
22 | 
23 | gui-gossip.txt
24 | 
25 | web-presentation.txt
26 | 
27 | xml-parsing.txt
28 | 
29 | python-30.txt
30 | 


--------------------------------------------------------------------------------
/bak/llnl-blurb.txt:
--------------------------------------------------------------------------------
 1 | Intermediate and Advanced Software Carpentry with Python
 2 | 
 3 | What you will learn:
 4 | 
 5 | * how to use and extend builtin advanced types in Python;
 6 | * how to lay out code for ease of maintenance, reusability, and testability;
 7 | * how to profile for performance bottlenecks and improve performance with
 8 |     extensions and threading;
 9 | * how to start using the wide variety of external packages that are
10 |     useful for scientists, including plotting and data analysis tools
11 |     such as matplotlib, SciPy, IDLE, MPI, and Rpy;
12 | * make your data more accessible to yourself and others with databases
13 |     and Web presentation tools;
14 | 
15 | Course benefits:
16 | 
17 | The Python programming language contains an immense number of features
18 | that are extraordinarily useful to scientific programmers and readily
19 | accessible to intermediate level developers.  This course will provide
20 | an introduction to many of these features, focusing on those that will
21 | make your Python programs more maintainable, testable, accurate, and
22 | faster.  This course will also introduce a number of third-party
23 | packages for development, plotting, and data-analysis that are
24 | particularly useful to scientists.
25 | 
26 | Who should attend:
27 | 
28 | Scientists who use Python for data processing, data analysis, data
29 | presentation, data management, or working with external code and
30 | libraries.  An introductory knowledge of Python is assumed, as are
31 | basic concepts in object-oriented programming.
32 | 
33 | Hands-on training:
34 | 
35 | Exercises throughout this course offer immediate, hands-on reinforcement
36 | of the ideas you are learning.  Exercises include:
37 | 
38 | * recipes for interacting with advanced Python builtin types;
39 | * refactoring example programs for better code reuse and testing;
40 | * writing unit tests, doc tests, and functional tests for existing code;
41 | * enhancing data processing performance with psyco, pyrex, and C extensions;
42 | * refactoring C extension code to support multithreading;
43 | * graphing data in matplotlib;
44 | * working with MPI in Python;
45 | * practical work with the IDLE IDE;
46 | * interacting with a large database via the Web;
47 | * building a simple graphical interface for data analysis;
48 | 
49 | 


--------------------------------------------------------------------------------
/bak/llnl-class-iter2.txt:
--------------------------------------------------------------------------------
  1 | ========================================================
  2 | Intermediate and Advanced Software Carpentry with Python
  3 | ========================================================
  4 | 
  5 | Teaching philosophy
  6 | ===================
  7 | 
  8 | Light coverage of background & structure of topics, with lots of demos
  9 | and hands-on experience.  The overall goal is to prepare students to
 10 | forge ahead on their own, yet give them enough material to point them
 11 | in the right direction.
 12 | 
 13 | Outline
 14 | =======
 15 | 
 16 | Three days: plan for 3-4 hours lectures, 2-3 hours hands-on
 17 | interspersed, plus breaks.
 18 | 
 19 | Day 1
 20 | -----
 21 | 
 22 | Goal: Ensure that participants understand how to build re-usable Python
 23 |       code & design for re-use and maintenance.
 24 | 
 25 | Morning: assessment of level & "fun stuff"
 26 | 
 27 |  * intermediate & advanced features of the Python language: tuples,
 28 |    lists, and dictionaries; functions, exceptions, & inheritance;
 29 |    private methods and variables; generators and iterators; list
 30 |    comprehension; defining new "-like" types (list-like,
 31 |    dict-like).
 32 | 
 33 |  * building Python programs and laying out packages: module globals,
 34 |    classes vs modules; single vs multiple files; libraries vs
 35 |    executables; some simple naming schemes.
 36 | 
 37 | Afternoon: 
 38 | 
 39 |  * writing for reusability: classes, modules, packages.  Decomposing
 40 |        functionality, and what structures are appropriate.
 41 | 
 42 |  * maintaining Python codebases and testing: unittest, doctest, the
 43 |    point of testing.  Using code coverage analyses to target tests.
 44 |    Building domain specific languages for testing/extensibility
 45 |    purposes.  The "developer lifecycle", refactoring, and regression
 46 |    tests (*when did my program change!?*).  pdb/tracebacks for
 47 |    debugging.
 48 | 
 49 |  * the stdlib -- useful modules.  (whole laundry list here!)
 50 | 
 51 |  * a brief intro to extending Python with C/C++; creating/maintaining
 52 |    mixed codebases.
 53 | 
 54 |  * Threading in Python and the Global Interpreter Lock.  Practical
 55 |    examples of why this matters.
 56 | 
 57 |  * What's coming in 3.0.
 58 | 
 59 | This day will be devoted to exploring people's knowledge about Python,
 60 | and can be adjusted dynamically to provide more basic or more advanced
 61 | information.
 62 | 
 63 | Day 2
 64 | -----
 65 | 
 66 | Goal: Introduce participants to the variety of (excellent!) tools for
 67 |       working with Python, esp in science.
 68 | 
 69 | Tools:
 70 | 
 71 |  * Wrapping C/C++ code automatically: SWIG, SIP, Boost examples.
 72 | 
 73 |  * UNIX tools to help you develop and collaborate: screen, VNC
 74 |  * IPython -- a nifty way to interact with Python.
 75 |  * NumPy/SciPy -- scientific extensions to Python.
 76 |  * pyMPI -- Python interface to the MPI.
 77 |  * Rpy -- accessing the R package & libraries from Python
 78 |  * matplotlib -- an incredibly nice package for plotting
 79 |  * Using IDLE to write Python code
 80 |  * Trac project management: intro & configuration.  Using Trac effectively.
 81 |  * Using the subprocess module
 82 | 
 83 | This day will briefly explore the variety of tools for effectively
 84 | working with and reaching out from Python.  The general format will
 85 | be to briefly introduce each tool, demonstrate the interface, and
 86 | provide a good starting point for further exploration.  (This is a lot
 87 | of tools, but I can provide some good hardcopy demos and walk
 88 | them through the initial examples.)
 89 | 
 90 | Day 3
 91 | -----
 92 | 
 93 | Goal: Provide hands-on experience with automatically producing static
 94 |       and interactive views of your data and analysis results.
 95 | 
 96 | Databases, data analysis, and data presentation
 97 | 
 98 |  * A brief intro to Tkinter, a built-in graphical interface system.
 99 | 
100 |  * Building static HTML output from your analyses -- an effective way
101 |       to present and explore your own data.
102 |  * Building dynamic HTML output with CGI, and why it's not a good idea.
103 |  * Using CherryPy to build a lightweight yet dynamic Web site (better idea!)
104 | 
105 |  * Storing data in a structured manner
106 |    - shelve/bsddb
107 |    - Durus, a simple object database
108 |    - SQLite/MySQL/PostgreSQL (simple examples only)
109 | 
110 |  * Digressions on client/server programming and event-driven programming,
111 |       and how to do them without driving yourself insane.
112 | 
113 |  * Tying a database into your Web server and data analysis pipeline.
114 | 
115 |  * Building remote interfaces to your data (?? is this of interest? I find
116 | 	    it incredibly useful myself, but it can be very complex)
117 | 
118 |  * All you will ever need to know about XML programming (as opposed to
119 |       *design*) - SAX vs DOM.  (30 minutes at the end.)
120 | 
121 | This day will introduce people to effective techniques for data storage
122 | and presentation with Python.
123 | 
124 | Comments/Questions:
125 | 
126 |  * I find XML to be not so useful, as it's mainly a way to encode data
127 |    in an unambiguous yet difficult to transmit manner.  Is there a
128 |    particular reason to cover it in depth?  (Most of the complexity is
129 |    in designing XML minilanguages, not in outputting or parsing it, which
130 |    can be covered fairly quickly.)
131 | 
132 |  * Not sure where to fit the stdlib in.  Right now it's in day 1, where
133 |    it belongs -- but there's a lot of other stuff in there.
134 | 
135 |  * I can expand or contract day 2, but I think the current list of topics
136 |    is pretty important and could take all day as-is.
137 | 
138 |  * Day 3 will be fun ;)
139 | 


--------------------------------------------------------------------------------
/bak/llnl-class-iter3.txt:
--------------------------------------------------------------------------------
  1 | ========================================================
  2 | Intermediate and Advanced Software Carpentry with Python
  3 | ========================================================
  4 | 
  5 | Teaching philosophy
  6 | ===================
  7 | 
  8 | Light coverage of background & structure of topics, with lots of demos
  9 | and hands-on experience.  The overall goal is to prepare students to
 10 | forge ahead on their own, yet give them enough material to point them
 11 | in the right direction.
 12 | 
 13 | Outline
 14 | =======
 15 | 
 16 | Three days: plan for 3-4 hours lectures, 2-3 hours hands-on
 17 | interspersed, plus breaks.  I will be available to consult
 18 | with individuals and groups about specific issues (incl.
 19 | source code) over lunch & dinner.
 20 | 
 21 | Day 1
 22 | -----
 23 | 
 24 | Goal: Ensure that participants understand how to build re-usable Python
 25 |       code & design for re-use and maintenance.
 26 | 
 27 | Morning: assessment of level & "fun stuff"
 28 | 
 29 |  * brief motivating discussion (program creatively & know your tools)
 30 | 
 31 |  * intermediate & advanced features of the Python language: tuples,
 32 |    lists, and dictionaries; functions, exceptions, & inheritance;
 33 |    private methods and variables; generators and iterators; list
 34 |    comprehension; defining new "-like" types (list-like,
 35 |    dict-like).
 36 | 
 37 |  * building Python programs and laying out packages: module globals,
 38 |    classes vs modules; single vs multiple files; libraries vs
 39 |    executables; some simple naming schemes.
 40 | 
 41 | Afternoon: 
 42 | 
 43 |  * writing for reusability: classes, modules, packages.  Decomposing
 44 |        functionality, and what structures are appropriate.
 45 | 
 46 |    (Reusability of code is a productivity multiplier, and reduces
 47 |     bug incidence significantly.)
 48 | 
 49 |  * maintaining Python codebases and testing: unittest/nose, doctest, the
 50 |    point of testing.  Using code coverage analyses to target tests.
 51 |    Building domain specific languages for testing/extensibility
 52 |    purposes.  The "developer lifecycle", refactoring, and regression
 53 |    tests (*when did my program change!?*).  pdb/tracebacks for
 54 |    debugging.
 55 | 
 56 |    (Testing increases programmer productivity exponentially.)
 57 | 
 58 |  * profiling and performance enhancing modules (psyco, pyrex).
 59 | 
 60 |  * a brief intro to extending Python with C/C++; creating/maintaining
 61 |    mixed codebases.
 62 | 
 63 |  * useful modules in the stdlib.
 64 | 
 65 | This day will be devoted to exploring people's knowledge about Python,
 66 | and can be adjusted dynamically to provide more basic or more advanced
 67 | information.
 68 | 
 69 | Day 2
 70 | -----
 71 | 
 72 | Goal: Introduce participants to the variety of (excellent!) tools for
 73 |       working with Python, esp in science.
 74 | 
 75 | Tools:
 76 | 
 77 |  * useful modules in the stdlib (continued).
 78 | 
 79 |  * Wrapping C/C++ code automatically: SWIG, SIP, Boost examples.
 80 | 
 81 |  * Threading in Python and the Global Interpreter Lock.  Practical
 82 |    examples of why this matters.
 83 | 
 84 |  * UNIX tools to help you develop and collaborate: screen, VNC
 85 |  * IPython -- a nifty way to interact with Python.
 86 |  * NumPy/SciPy -- scientific extensions to Python.
 87 |  * pyMPI -- Python interface to the MPI.
 88 |  * Rpy -- accessing the R package & libraries from Python
 89 |  * matplotlib -- an incredibly nice package for plotting
 90 |  * Using IDLE to write Python code
 91 |  * Trac project management: intro & configuration.  Using Trac effectively.
 92 |  * Using the subprocess module
 93 | 
 94 | This day will briefly explore the variety of tools for effectively
 95 | working with and reaching out from Python.  The general format will
 96 | be to briefly introduce each tool, demonstrate the interface, and
 97 | provide a good starting point for further exploration.
 98 | 
 99 | Day 3
100 | -----
101 | 
102 | Goal: Provide hands-on experience with automatically producing static
103 |       and interactive views of your data and analysis results.
104 | 
105 | Data presentation and databases.
106 | 
107 |  * A brief intro to Tkinter, a built-in graphical interface system.
108 | 
109 |  * Building static HTML output from your analyses -- an effective way
110 |       to present and explore your own data.
111 |  * Building dynamic HTML output with CGI, and why it's not a good idea.
112 |  * Using CherryPy to build a lightweight yet dynamic Web site (a better idea!)
113 |    - **note** discuss security issues
114 | 
115 |  * Storing data in a structured manner
116 |    - shelve/bsddb
117 |    - Durus, a simple object database
118 |    - SQLite/MySQL/PostgreSQL (simple examples only)
119 | 
120 |  * Client/server programming and event-driven programming.
121 | 
122 |  * Tying a database into your Web server and data analysis pipeline.
123 | 
124 |  * Building remote interfaces to your data with XML-RPC (brief).
125 | 
126 |  * XML parsing and the Document Object Model (DOM).
127 | 
128 |  * What's coming in 3.0.
129 | 
130 | This day will introduce people to effective techniques for data storage
131 | and presentation with Python.
132 | 


--------------------------------------------------------------------------------
/bak/llnl-class.txt:
--------------------------------------------------------------------------------
  1 | ========================================================
  2 | Intermediate and Advanced Software Carpentry with Python
  3 | ========================================================
  4 | 
  5 | Outline
  6 | =======
  7 | 
  8 | Three days: plan for three hours instruction, three hours hands-on, plus
  9 | breaks.
 10 | 
 11 | Day 1
 12 | -----
 13 | 
 14 | Goal: Ensure that participants understand how to build re-usable Python
 15 |       code & design for re-use and maintenance.
 16 | 
 17 | Straight Python:
 18 | 
 19 |  * building Python programs and laying out packages
 20 |  * writing for reusability
 21 |  * maintaining Python codebases & testing
 22 |  * advanced features of the Python language
 23 |  * a brief intro to extending Python with C/C++
 24 | 
 25 | This day will be devoted to exploring people's knowledge about Python,
 26 | and can be adjusted dynamically to provide more basic or more advanced
 27 | information.
 28 | 
 29 | Day 2
 30 | -----
 31 | 
 32 | Goal: Introduce participants to the variety of (excellent!) tools for
 33 |       working with Python, esp in science.
 34 | 
 35 | Tools
 36 | 
 37 |  * Wrapping C/C++ code automatically
 38 |  * NumPy/SciPy
 39 |  * Rpy, matplotlib: tools for plotting
 40 |  * UNIX tools to help you develop and collaborate: screen, VNC
 41 |  * IDLE/IDEs
 42 |  * Centralized and distributed version control 
 43 |  * Trac project management
 44 |  * IPython interactive Python interpreter
 45 | 
 46 | This day will explore the variety of tools for effectively working
 47 | with and reaching out from Python.
 48 | 
 49 | Day 3
 50 | -----
 51 | 
 52 | Goal: Provide hands-on experience with automatically producing static
 53 |       and interactive views of your data and analysis results.
 54 | 
 55 | Databases, data analysis, and data presentation
 56 | 
 57 |  * Storing data in a structured manner
 58 |  * Built-in Python options (shelve/bsddb)
 59 |  * Using SQL
 60 |      - SQLite
 61 |      - MySQL/PostgreSQL
 62 |  * Building static HTML output
 63 |  * Building dynamic HTML output with CGI/CherryPy
 64 |  * Tying the database into your Web server
 65 |  * Testing your Web stuff
 66 | 
 67 | This day will introduce people to effective techniques for data storage
 68 | and presentation with Python.
 69 | 
 70 | (A whole day might be needed because of the variety of topics: both
 71 | HTML and SQL must be introduced!)
 72 | 
 73 | The menu of topics
 74 | ===================
 75 | 
 76 | Building reusable code:
 77 | 
 78 |  - modules, globals vs locals, import issues
 79 |  - PYTHONPATH
 80 |  - building/installing packages: distutils, easy_install, 'require'
 81 | 
 82 | Testing
 83 | 
 84 |  - doctests, unittests, test fixtures
 85 |  - more advanced unit testing tools: nose/py.test
 86 |  - code coverage/figleaf
 87 | 
 88 | Simple database stuff
 89 |  
 90 |  - pickling
 91 |  - bsddb/shelve
 92 |  - SQL, sqlite, and MySQL/PostgreSQL
 93 |  - Durus/ZODB: object databases
 94 | 
 95 | Docstrings and automatic generation of documentation
 96 | 
 97 | Building Python interfaces to C and C++ code
 98 | 
 99 |  - writing simple interfaces manually is easy
100 |  - SWIG, Boost.Python, SIP: examples & tradeoffs
101 |  - 
102 |  - testing C code from Python
103 | 
104 | Java/Jython
105 | 
106 | .NET/Mono/IronPython
107 | 
108 | NumPy/SciPy
109 | 
110 | matplotlib, a matlib-type Python graphing/display system
111 | 
112 | Rpy, Python interface to R
113 | 
114 | Generators, iterators, yield, list/generator comprehensions
115 | 
116 | The lesser known (but useful!) corners of the Python stdlib 
117 | 
118 | File management and APIs: how to deal nicely with paths, data files,
119 |   etc.
120 | 
121 | Using subprocess to flexibly execute external programs.
122 | 
123 | IPython interactive Python prompt
124 | 
125 | Another way to develop: scripting with two windows
126 | 
127 | XML parsing
128 | 
129 | Generating HTML for analysis summary and presentation
130 | 
131 | The logging package: logging and py.logging
132 | 
133 | Python interfaces to MPI
134 | 
135 | Concurrency and threading in Python: threading vs fork vs...; the
136 |    Global Interpreter Lock
137 | 
138 | py.lib sshexec, a flexible way to run programs on multiple computers
139 | 
140 | How Python is developed and how to think about backwards/forwards compatibility
141 | 
142 | IDEs: IDLE
143 | 
144 | Building simple Web servers (with CherryPy, probably?  Or CGI.)
145 | 
146 | A brief introduction to GUI development in Python.
147 | 
148 | UNIX tricks: screen, VNC
149 | 
150 | pdb, the Python debugger
151 | 
152 | Building your own types: using dicts and lists as interfaces to your own
153 |    data; advanced dictionary use.
154 | 
155 | Version control with subversion, darcs, bzr-ng
156 | 
157 | Project, ticket, and timeline management with Trac.
158 | 


--------------------------------------------------------------------------------
/code/import/mod_a.py:
--------------------------------------------------------------------------------
1 | print 'at top of mod_a'
2 | import mod_b
3 | print 'mod_a: defining x'
4 | x = 5
5 | 


--------------------------------------------------------------------------------
/code/import/mod_b.py:
--------------------------------------------------------------------------------
1 | print 'at top of mod_b'
2 | import mod_a
3 | print 'mod_b: defining y'
4 | y = mod_a.x
5 | 


--------------------------------------------------------------------------------
/code/tests/test_sort.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import unittest
 3 | class Test(unittest.TestCase):
 4 |     def test_me(self):
 5 |         seq = [ 5, 4, 1, 3, 2 ]
 6 |         seq.sort()
 7 |         self.assertEqual(seq, [1, 2, 3, 4, 5])
 8 | 
 9 | unittest.main()
10 | 


--------------------------------------------------------------------------------
/code/tests/test_sort2.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import unittest
 3 | import random
 4 | 
 5 | class Test(unittest.TestCase):
 6 |     def setUp(self):
 7 |         self.seq = range(0, 10)
 8 |         random.shuffle(self.seq)
 9 | 
10 |     def tearDown(self):
11 |         del self.seq
12 | 
13 |     def test_basic_sort(self):
14 |         self.seq.sort()
15 |         self.assertEqual(self.seq, range(0, 10))
16 | 
17 |     def test_reverse(self):
18 |         self.seq.sort()
19 |         self.seq.reverse()
20 |         self.assertEqual(self.seq, [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
21 | 
22 |     def test_destruct(self):
23 |         self.seq.sort()
24 |         del self.seq[-1]
25 |         self.assertEqual(self.seq, range(0, 9))
26 | 
27 | unittest.main()
28 | 


--------------------------------------------------------------------------------
/course-outline.txt:
--------------------------------------------------------------------------------
  1 | ========================================================
  2 | Intermediate and Advanced Software Carpentry with Python
  3 | ========================================================
  4 | 
  5 | Teaching philosophy
  6 | ===================
  7 | 
  8 | Light coverage of background & structure of topics, with lots of demos
  9 | and hands-on experience.  The overall goal is to prepare students to
 10 | forge ahead on their own, yet give them enough material to point them
 11 | in the right direction.
 12 | 
 13 | Outline
 14 | =======
 15 | 
 16 | Three days: plan for 3-4 hours lectures, 2-3 hours hands-on
 17 | interspersed, plus breaks.  I will be available to consult
 18 | with individuals and groups about specific issues (incl.
 19 | source code) over lunch & dinner.
 20 | 
 21 | Day 1
 22 | -----
 23 | 
 24 | Goal: Ensure that participants understand how to build re-usable Python
 25 |       code & design for re-use and maintenance.
 26 | 
 27 | Morning: assessment of level & "fun stuff"
 28 | 
 29 |  * brief motivating discussion (program creatively & know your tools)
 30 | 
 31 |  * intermediate & advanced features of the Python language: tuples,
 32 |    lists, and dictionaries; functions, exceptions, & inheritance;
 33 |    private methods and variables; generators and iterators; list
 34 |    comprehension; defining new "-like" types (list-like,
 35 |    dict-like).
 36 | 
 37 |  * building Python programs and laying out packages: module globals,
 38 |    classes vs modules; single vs multiple files; libraries vs
 39 |    executables; some simple naming schemes.
 40 | 
 41 | Afternoon: 
 42 | 
 43 |  * writing for reusability: classes, modules, packages.  Decomposing
 44 |        functionality, and what structures are appropriate.
 45 | 
 46 |    (Reusability of code is a productivity multiplier, and reduces
 47 |     bug incidence significantly.)
 48 | 
 49 |  * maintaining Python codebases and testing: unittest/nose, doctest, the
 50 |    point of testing.  Using code coverage analyses to target tests.
 51 |    Building domain specific languages for testing/extensibility
 52 |    purposes.  The "developer lifecycle", refactoring, and regression
 53 |    tests (*when did my program change!?*).  pdb/tracebacks for
 54 |    debugging.
 55 | 
 56 |    (Testing increases programmer productivity exponentially.)
 57 | 
 58 |  * profiling and performance enhancing modules (psyco, pyrex).
 59 | 
 60 |  * a brief intro to extending Python with C/C++; creating/maintaining
 61 |    mixed codebases.
 62 | 
 63 |  * useful modules in the stdlib.
 64 | 
 65 | This day will be devoted to exploring people's knowledge about Python,
 66 | and can be adjusted dynamically to provide more basic or more advanced
 67 | information.
 68 | 
 69 | Day 2
 70 | -----
 71 | 
 72 | Goal: Introduce participants to the variety of (excellent!) tools for
 73 |       working with Python, esp in science.
 74 | 
 75 | Tools:
 76 | 
 77 |  * useful modules in the stdlib (continued).
 78 | 
 79 |  * Wrapping C/C++ code automatically: SWIG, SIP, Boost examples.
 80 | 
 81 |  * Threading in Python and the Global Interpreter Lock.  Practical
 82 |    examples of why this matters.
 83 | 
 84 |  * UNIX tools to help you develop and collaborate: screen, VNC
 85 |  * IPython -- a nifty way to interact with Python.
 86 |  * NumPy/SciPy -- scientific extensions to Python.
 87 |  * pyMPI -- Python interface to the MPI.
 88 |  * Rpy -- accessing the R package & libraries from Python
 89 |  * matplotlib -- an incredibly nice package for plotting
 90 |  * Using IDLE to write Python code
 91 |  * Trac project management: intro & configuration.  Using Trac effectively.
 92 |  * Using the subprocess module
 93 | 
 94 | This day will briefly explore the variety of tools for effectively
 95 | working with and reaching out from Python.  The general format will
 96 | be to briefly introduce each tool, demonstrate the interface, and
 97 | provide a good starting point for further exploration.
 98 | 
 99 | Day 3
100 | -----
101 | 
102 | Goal: Provide hands-on experience with automatically producing static
103 |       and interactive views of your data and analysis results.
104 | 
105 | Data presentation and databases.
106 | 
107 |  * A brief intro to Tkinter, a built-in graphical interface system.
108 | 
109 |  * Building static HTML output from your analyses -- an effective way
110 |       to present and explore your own data.
111 |  * Building dynamic HTML output with CGI, and why it's not a good idea.
112 |  * Using CherryPy to build a lightweight yet dynamic Web site (a better idea!)
113 |    - **note** discuss security issues
114 | 
115 |  * Storing data in a structured manner
116 |    - shelve/bsddb
117 |    - Durus, a simple object database
118 |    - SQLite/MySQL/PostgreSQL (simple examples only)
119 | 
120 |  * Client/server programming and event-driven programming.
121 | 
122 |  * Tying a database into your Web server and data analysis pipeline.
123 | 
124 |  * Building remote interfaces to your data with XML-RPC (brief).
125 | 
126 |  * XML parsing and the Document Object Model (DOM).
127 | 
128 |  * What's coming in 3.0.
129 | 
130 | This day will introduce people to effective techniques for data storage
131 | and presentation with Python.
132 | 


--------------------------------------------------------------------------------
/data/commented-data.txt:
--------------------------------------------------------------------------------
1 | # this is a comment or a header
2 | 1
3 | # another comment
4 | 2
5 | 


--------------------------------------------------------------------------------
/data/keyvalue.txt:
--------------------------------------------------------------------------------
1 | a 5
2 | b 6
3 | d 7
4 | a 2
5 | c 1
6 | 


--------------------------------------------------------------------------------
/data/listfile.txt:
--------------------------------------------------------------------------------
1 | a
2 | b
3 | c
4 | d
5 | 


--------------------------------------------------------------------------------
/library-tour.txt:
--------------------------------------------------------------------------------
 1 | A Brief Scan of the Python Standard Library
 2 | ===========================================
 3 | 
 4 | Here are some modules that I thought might be useful to know about.
 5 | 
 6 | optparse
 7 | --------
 8 | 
 9 | Command line options parsing so that your scripts can take options, print
10 | help for the options, and otherwise behave like a nicely put together script.
11 | 
12 | First, import the module and create a parser object:
13 | 
14 | >>> from optparse import OptionParser
15 | 
16 | >>> parser = OptionParser()
17 | 
18 | Now, add some options.  The first option here says to take a filename as
19 | an option after either -f or --file; the second option toggles the
20 | 'verbose' attribute on the option parser to be True.
21 | 
22 | >>> _ = parser.add_option("-f", "--file", dest="filename",
23 | ...                  help="write report to FILE", metavar="FILE")
24 | >>> _ = parser.add_option("-q", "--quiet",
25 | ...                  action="store_false", dest="verbose", default=True,
26 | ...                  help="don't print status messages to stdout")
27 | 
28 | Now you can parse a dummy command line and retrieve the options as well
29 | as any following arguments:
30 | 
31 | >>> test_cmdline = "dummy --quiet -f testfile arg1 arg2 arg3".split()[1:]
32 | 
33 | >>> (options, args) = parser.parse_args(test_cmdline)
34 | >>> print options
35 | {'verbose': False, 'filename': 'testfile'}
36 | >>> print args
37 | ['arg1', 'arg2', 'arg3']
38 | 
39 | There's also a convenient help message printout:
40 | 
41 | >>> try:
42 | ...    parser.parse_args(["-h"])
43 | ... except SystemExit:
44 | ...    pass
45 | Usage: run-doctests.py [options]
46 | <BLANKLINE>
47 | Options:
48 |   -h, --help            show this help message and exit
49 |   -f FILE, --file=FILE  write report to FILE
50 |   -q, --quiet           don't print status messages to stdout
51 | 
52 | 
53 | 
54 | 
55 | StringIO
56 | --------
57 | 
58 | difflib
59 | -------
60 | 
61 | pprint
62 | ------
63 | 
64 | weakref
65 | -------
66 | 
67 | itertools and functools
68 | -----------------------
69 | 
70 | csv
71 | ---
72 | 
73 | ConfigParser
74 | ------------
75 | 
76 | os.path
77 | -------
78 | 
79 | tempfile
80 | --------
81 | 
82 | gzip
83 | ----
84 | 
85 | urllib
86 | ------
87 | 


--------------------------------------------------------------------------------
/publish/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/IntermediateandAdvancedSoftwareCarpentry.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/IntermediateandAdvancedSoftwareCarpentry.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/IntermediateandAdvancedSoftwareCarpentry"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/IntermediateandAdvancedSoftwareCarpentry"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/publish/c++-wrapping.txt:
--------------------------------------------------------------------------------
  1 | Wrapping C/C++ for Python
  2 | =========================
  3 | 
  4 | There are a number of options if you want to wrap existing C or C++
  5 | functionality in Python.
  6 | 
  7 | Manual wrapping
  8 | ---------------
  9 | 
 10 | If you have a relatively small amount of C/C++ code to wrap, you can
 11 | do it by hand.  The `Extending and Embedding
 12 | <http://docs.python.org/ext/ext.html>`__ section of the docs is a pretty
 13 | good reference.
 14 | 
 15 | When I write wrappers for C and C++ code, I usually provide a procedural
 16 | interface to the code and then use Python to construct an object-oriented
 17 | interface.  I do things this way for two reasons: first, exposing C++
 18 | objects to Python is a pain; and second, I prefer writing higher-level
 19 | structures in Python to writing them in C++.
 20 | 
 21 | Let's take a look at a basic wrapper: we have a function 'hello' in a
 22 | file 'hello.c'.  'hello' is defined like so: ::
 23 | 
 24 |    char * hello(char * what)
 25 | 
 26 | To wrap this manually, we need to do the following.
 27 | 
 28 | First, write a Python-callable function that takes in a string and returns
 29 | a string. ::
 30 | 
 31 |    static PyObject * hello_wrapper(PyObject * self, PyObject * args)
 32 |    {
 33 |      char * input;
 34 |      char * result;
 35 |      PyObject * ret;
 36 | 
 37 |      // parse arguments   
 38 |      if (!PyArg_ParseTuple(args, "s", &input)) {
 39 |        return NULL;
 40 |      }
 41 |    
 42 |      // run the actual function
 43 |      result = hello(input);
 44 |    
 45 |      // build the resulting string into a Python object.
 46 |      ret = PyString_FromString(result);
 47 |      free(result);
 48 |    
 49 |      return ret;
 50 |    }
 51 | 
 52 | Second, register this function within a module's symbol table (all Python
 53 | functions live in a module, even if they're actually C functions!) ::
 54 | 
 55 |    static PyMethodDef HelloMethods[] = {
 56 |     { "hello", hello_wrapper, METH_VARARGS, "Say hello" },
 57 |     { NULL, NULL, 0, NULL }
 58 |    };
 59 | 
 60 | Third, write an init function for the module (all extension modules require
 61 | an init function). ::
 62 | 
 63 |    DL_EXPORT(void) inithello(void)
 64 |    {
 65 |      Py_InitModule("hello", HelloMethods);
 66 |    }
 67 | 
 68 | Fourth, write a setup.py script: ::
 69 | 
 70 |    from distutils.core import setup, Extension
 71 |    
 72 |    # the c++ extension module
 73 |    extension_mod = Extension("hello", ["hellomodule.c", "hello.c"])
 74 |    
 75 |    setup(name = "hello", ext_modules=[extension_mod])
 76 | 
 77 | There are two aspects of this code that are worth discussing, even
 78 | at this simple level.
 79 | 
 80 | First, error handling: note the PyArg_ParseTuple call.  That call
 81 | is what tells Python that the 'hello' wrapper function takes precisely
 82 | one argument, a string ("s" means "string"; "ss" would mean "two strings";
 83 | "si" would mean "string and integer").  The convention in the C API to Python
 84 | is that a NULL return from a function that returns PyObject* indicates
 85 | an error has occurred; in this case, the error information is set
 86 | within PyArg_ParseTuple and we're just passing the error on up the stack
 87 | by returning NULL.
 88 | 
 89 | Second, references.  Python works on a system of reference counting:
 90 | each time a function "takes ownership" of an object (by, for example,
 91 | assigning it to a list, or a dictionary) it increments that object's
 92 | reference count by one using Py_INCREF.  When the object is removed
 93 | from use in that particular place (e.g. removed from the list or
 94 | dictionary), the reference count is decremented with Py_DECREF.  When
 95 | the reference count reaches 0, Python knows that this object is not
 96 | being used by anything and can be freed (it may not be freed immediately,
 97 | however).
 98 | 
 99 | Why does this matter?  Well, we're creating a PyObject in this code,
100 | with PyString_FromString.  Do we need to INCREF it?  To find out,
101 | go take a look at the documentation for PyString_FromString:
102 | 
103 |     http://docs.python.org/api/stringObjects.html#l2h-461
104 | 
105 | See where it says "New reference"?  That means it's handing back an
106 | object with a reference count of 1, and that's what we want.  If it
107 | had said "Borrowed reference", then we would need to INCREF the object
108 | before returning it, to indicate that we wanted the allocated memory to
109 | survive past the end of the function.
110 | 
111 | Here's a way to think about references:
112 | 
113 |  - if you receive a Python object from the Python API, you can use it
114 |    within your own C code without INCREFing it.
115 | 
116 |  - if you want to guarantee that the Python object survives past the
117 |    end of your own C code, you must INCREF it.
118 | 
119 |  - if you received an object from Python code and it was a new reference,
120 |    but you don't want it to survive past the end of your own C code, you
121 |    should DECREF it.
122 | 
123 | If you wanted to return None, by the way, you can use Py_None.  Remember
124 | to INCREF it!
125 | 
126 | Another note: during the class, I talked about using PyCObjects to
127 | pass opaque C/C++ data types around.  This is useful if you are using
128 | Python to organize your code, but you have complex structures that you
129 | don't need to be Python-accessible.  You can wrap pointers in
130 | PyCObjects (with an associated destructor, if so desired) at which
131 | point they become opaque Python objects whose memory is managed by the
132 | Python interpreter.  You can see an example in the example code, under
133 | ``code/hello/hellmodule.c``, functions ``cobj_in``, ``cobj_out``, and
134 | ``free_my_struct``, which pass an allocated C structure back to Python
135 | using a PyCObject wrapper.
136 | 
137 | So that's a brief introduction to how you wrap things by hand.
138 | 
139 | As you might guess, however, there are a number of projects devoted
140 | to automatically wrapping code.  Here's a brief introduction to some of
141 | them.
142 | 
143 | .. CTB: talk about testing c code with python?
144 | .. Also pointers, deallocators.  (khmer?)
145 | 
146 | Wrapping Python code with SWIG
147 | ------------------------------
148 | 
149 | SWIG stands for "Simple Wrapper Interface Generator", and it is
150 | capable of wrapping C in a large variety of languages.  To quote,
151 | "SWIG is used with different types of languages including common
152 | scripting languages such as Perl, PHP, Python, Tcl, Ruby and PHP. The
153 | list of supported languages also includes non-scripting languages such
154 | as C#, Common Lisp (CLISP, Allegro CL, CFFI, UFFI), Java, Modula-3 and
155 | OCAML. Also several interpreted and compiled Scheme implementations
156 | (Guile, MzScheme, Chicken) are supported."
157 | 
158 | Whew.
159 | 
160 | But we only care about Python for now!
161 | 
162 | SWIG is essentially a macro language that groks C code and can spit
163 | out wrapper code for your language of choice.
164 | 
165 | You'll need three things for a SWIG wrapping of our 'hello' program.
166 | First, a Makefile: ::
167 | 
168 |    all:
169 | 	swig -python -c++ -o _swigdemo_module.cc swigdemo.i
170 | 	python setup.py build_ext --inplace
171 | 
172 | This shows the steps we need to run: first, run SWIG to generate
173 | the C code extension; then run ``setup.py build`` to actually build it.
174 | 
175 | Second, we need a SWIG wrapper file, 'swigdemo.i'.  In this case, it
176 | can be pretty simple: ::
177 | 
178 |    %module swigdemo
179 |    
180 |    %{
181 |    #include <stdlib.h>
182 |    #include "hello.h"
183 |    %}
184 | 
185 |    %include "hello.h"
186 | 
187 | A few things to note: the %module specifies the name of the module
188 | to be generated from this wrapper file.  The code between the
189 | %{ %} is placed, verbatim, in the C output file; in this case it
190 | just includes two header files.  And, finally, the last line, %include,
191 | just says "build your interface against the declarations in this header
192 | file".
193 | 
194 | OK, and third, we will need a setup.py.  This is virtually identical
195 | to the setup.py we wrote for the manual wrapping: ::
196 | 
197 |    from distutils.core import setup, Extension
198 |    
199 |    extension_mod = Extension("_swigdemo", ["_swigdemo_module.cc", "hello.c"])
200 |    
201 |    setup(name = "swigdemo", ext_modules=[extension_mod])
202 | 
203 | Now, when we run 'make', swig will generate the _swigdemo_module.cc
204 | file, as well as a 'swigdemo.py' file; then, setup.py will compile the
205 | two C files together into a single shared library, '_swigdemo', which
206 | is imported by swigdemo.py; then the user can just 'import swigdemo'
207 | and have direct access to everything in the wrapped module.
208 | 
209 | Note that swig can wrap most simple types "out of the box".  It's only
210 | when you get into your own types that you will have to worry about providing
211 | what are called "typemaps"; I can show you some examples.
212 | 
213 | I've also heard (from someone in the class) that SWIG is essentially
214 | not supported any more, so buyer beware.  (I will also say that SWIG
215 | is pretty crufty.  When it works and does exactly what you want, your
216 | life is good.  Fixing bugs in it is messy, though, as is adding new
217 | features, because it's a template language, and hence many of the
218 | constructs are ad hoc.)
219 | 
220 | Wrapping C code with pyrex
221 | --------------------------
222 | 
223 | pyrex, as I discussed yesterday, is a weird hybrid of C and Python
224 | that's meant for generating fast Python-esque code.  I'm not sure I'd
225 | call this "wrapping", but ... here goes.
226 | 
227 | First, write a .pyx file; in this case, I'm calling it 'hellomodule.pyx',
228 | instead of 'hello.pyx', so that I don't get confused with 'hello.c'. ::
229 | 
230 |    cdef extern from "hello.h":
231 |        char * hello(char *s)
232 | 
233 |    def hello_fn(s):
234 |        return hello(s)
235 | 
236 | What the 'cdef' says is, "grab the symbol 'hello' from the file
237 | 'hello.h'".  Then you just go ahead and define your 'hello_fn' as
238 | you would if it were Python.
239 | 
240 | and... that's it.  You've still got to write a setup.py, of course: ::
241 | 
242 |    from distutils.core import setup
243 |    from distutils.extension import Extension
244 |    from Pyrex.Distutils import build_ext
245 |    
246 |    setup(
247 |      name = "hello",
248 |      ext_modules=[ Extension("hellomodule", ["hellomodule.pyx", "hello.c"]) ],
249 |      cmdclass = {'build_ext': build_ext}
250 |    )
251 | 
252 | but then you can just run 'setup.py build_ext --inplace' and you'll be able
253 | to 'import hellomodule; hellomodule.hello_fn'.
254 | 
255 | ctypes
256 | ------
257 | 
258 | In Python 2.5, the ctypes module is included.  This module lets you
259 | talk directly to shared libraries on both Windows and UNIX, which is
260 | pretty darned handy.  But can it be used to call our C code directly?
261 | 
262 | The answer is yes, with a caveat or two.
263 | 
264 | First, you need to compile 'hello.c' into a shared library. ::
265 | 
266 |    gcc -o hello.so -shared -fPIC hello.c
267 | 
268 | Then, you need to tell the system where to find the shared library. ::
269 | 
270 |    export LD_LIBRARY_PATH=.
271 | 
272 | Now you can load the library with ctypes: ::
273 | 
274 |    from ctypes import cdll
275 | 
276 |    hello_lib = cdll.LoadLibrary("hello.so")
277 |    hello = hello_lib.hello
278 | 
279 | So far, so good -- now what happens if you run it? ::
280 | 
281 |    >> print hello("world")
282 |    136040696
283 | 
284 | Whoops!  You still need to tell Python/ctypes what kind of return
285 | value to expect!  In this case, we're expecting a char pointer: ::
286 |    
287 |    from ctypes import c_char_p
288 |    hello.restype = c_char_p
289 | 
290 | And now it will work:
291 | 
292 |    >> print hello("world")
293 |    hello, world
294 | 
295 | Voila!
296 | 
297 | I should say that ctypes is not intended for this kind of wrapping,
298 | because of the whole LD_LIBRARY_PATH setting requirement.  That is,
299 | it's really intended for accessing *system* libraries.  But you can
300 | still use it for other stuff like this.
301 | 
302 | SIP
303 | ---
304 | 
305 | SIP is the tool used to generate Python bindings for Qt (PyQt), a graphics
306 | library.  However, it can be used to wrap any C or C++ API.
307 | 
308 | As with SWIG, you have to start with a definition file.  In this case,
309 | it's pretty easy: just put this in 'hello.sip': ::
310 | 
311 |    %CModule hellomodule 0
312 | 
313 |    char * hello(char *);
314 | 
315 | Now you need to write a 'configure' script: ::
316 | 
317 |    import os
318 |    import sipconfig
319 |    
320 |    # The name of the SIP build file generated by SIP and used by the build
321 |    # system.
322 |    build_file = "hello.sbf"
323 |    
324 |    # Get the SIP configuration information.
325 |    config = sipconfig.Configuration()
326 |    
327 |    # Run SIP to generate the code.
328 |    os.system(" ".join([config.sip_bin, "-c", ".", "-b", build_file, "hello.sip"]))
329 |    
330 |    # Create the Makefile.
331 |    makefile = sipconfig.SIPModuleMakefile(config, build_file)
332 |    
333 |    # Add the library we are wrapping.  The name doesn't include any platform
334 |    # specific prefixes or extensions (e.g. the "lib" prefix on UNIX, or the
335 |    # ".dll" extension on Windows).
336 |    makefile.extra_libs = ["hello"]
337 |    makefile.extra_lib_dirs = ["."]   
338 |    
339 |    # Generate the Makefile itself.
340 |    makefile.generate()
341 | 
342 | Now, run 'configure.py', and then run 'make' on the generated Makefile,
343 | and your extension will be compiled.
344 | 
345 | (At this point I should say that I haven't really used SIP before, and I
346 | feel like it's much more powerful than this example would show you!)
347 | 
348 | Boost.Python
349 | ------------
350 | 
351 | If you are an expert C++ programmer and want to wrap a lot of C++ code,
352 | I would recommend taking a look at the Boost.Python library, which
353 | lets you run C++ code from Python, and Python code from C++, seamlessly.
354 | I haven't used it at all, and it's too complicated to cover in a short
355 | period!
356 | 
357 | http://www.boost-consulting.com/writing/bpl.html
358 | 
359 | Recommendations
360 | ---------------
361 | 
362 | Based on my little survey above, I would suggest using SWIG to write
363 | wrappers for relatively small libraries, while SIP probably provides a
364 | more manageable infrastructure for wrapping large libraries (which I
365 | know I did not demonstrate!)
366 | 
367 | Pyrex is astonishingly easy to use, and it may be a good option if you
368 | have a small library to wrap.  My guess is that you would spend a lot
369 | of time converting types back and forth from C/C++ to Python, but I could
370 | be wrong.
371 | 
372 | ctypes is excellent if you have a bunch of functions to run and you don't
373 | care about extracting complex data types from them: you just want to pass
374 | around the encapsulated data types between the functions in order to
375 | accomplish a goal.
376 | 
377 | One or two more notes on wrapping
378 | ---------------------------------
379 | 
380 | As I said at the beginning, I tend to write procedural interfaces to
381 | my C++ code and then use Python to wrap them in an object-oriented
382 | interface.  This lets me adjust the OO structure of my code more
383 | flexibly; on the flip side, I only use the code from Python, so I
384 | really don't care what the C++ code looks like as long as it runs fast
385 | ;).  So, you might find it worthwhile to invest in figuring out how to
386 | wrap things in a more object-oriented manner.
387 | 
388 | Secondly, one of the biggest benefits I find from wrapping my C code in
389 | Python is that all of a sudden I can test it pretty easily.  Testing is
390 | something you *do not* want to do in C, because you have to declare all
391 | the variables and stuff that you use, and that just gets in the way of
392 | writing simple tests.  I find that once I've wrapped something in Python,
393 | it becomes much more testable.
394 | 
395 | 


--------------------------------------------------------------------------------
/publish/code.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctb/advanced-swc/af81ac874f6a714a27bc378a1e919695ac5c0da3/publish/code.tar.gz


--------------------------------------------------------------------------------
/publish/code/README.txt:
--------------------------------------------------------------------------------
  1 | This is the source code for my LLNL course on *Intermediate and Advanced
  2 | Software Carpentry in Python*.
  3 | 
  4 | Directories
  5 | ===========
  6 | 
  7 | Each directory contains the examples for a specific
  8 | 
  9 | hello
 10 | -----
 11 | 
 12 | A few simple examples of a hand-built C extension module for Python.
 13 | 
 14 | To build & run the hello examples: ::
 15 | 
 16 |    python setup.py build
 17 |    python run.py
 18 | 
 19 | ctypes
 20 | ------
 21 | 
 22 | An example showing how to use the ctypes library module to execute
 23 | C functions directly from dynamic libraries.
 24 | 
 25 | To build the ctypes example: ::
 26 | 
 27 | 	make
 28 | 	LD_LIBRARY_CONFIG=. python hello_mod.py
 29 | 
 30 | You may need to change the Makefile to put the right magic flags in to
 31 | build a shared library on your platform.  Flags for Linux & Mac OS X
 32 | are already there.
 33 | 
 34 | parallelpython
 35 | --------------
 36 | 
 37 | A simple example of using the parallelpython package
 38 | (http://www.parallelpython.com/).
 39 | 
 40 | To run the examples on two CPUs, install parallelpython and run: ::
 41 | 
 42 |    cd parallelpython
 43 |    python calc_primes.py 2
 44 | 
 45 | profiling
 46 | ---------
 47 | 
 48 | Simple examples of using cProfile, hotshot, `statprof
 49 | <http://vallista.idyll.org/wiki/StatProf>`__, and timeit to profile
 50 | and time the execution of Python code.  See the files run-*.
 51 | 
 52 | psyco
 53 | -----
 54 | 
 55 | Simple example of using psyco, a specializing compiler for Python.
 56 | 
 57 | Go into the ``psyco/`` subdirectory and type ::
 58 | 
 59 |    python psyco-test.py
 60 | 
 61 | pyrex-hello
 62 | -----------
 63 | 
 64 | A simple example of using `pyrex <http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/>`__ to wrap a C function ("Hello, world").
 65 | 
 66 | Execute: ::
 67 | 
 68 |    cd pyrex-hello/
 69 |    python setup.py build_ext --inplace
 70 |    python run.py
 71 | 
 72 | pyrex-primes
 73 | ------------
 74 | 
 75 | A simple example of using `pyrex <http://www.cosc.canterbury.ac.nz/greg.ewing/python/Pyrex/>`__ to speed up Python.
 76 | 
 77 | Execute: ::
 78 | 
 79 |    cd pyrex-primes/
 80 |    python setup.py build_ext --inplace
 81 |    python run-primes.py
 82 | 
 83 | rpy
 84 | ---
 85 | 
 86 | Driving `R <http://www.r-project.org/>`__ from Python using `rpy
 87 | <http://rpy.sf.net/>`__ to run and display a Principle Components
 88 | Analysis.
 89 | 
 90 | Execute: ::
 91 | 
 92 |    cd rpy/
 93 |    python do-pca.py
 94 | 
 95 | sip
 96 | ---
 97 | 
 98 | A simple example of using `SIP <http://www.riverbankcomputing.co.uk/sip/>`__
 99 | to wrap C code in a Python extension module.
100 | 
101 | Execute: ::
102 | 
103 |    cd sip/
104 |    bash ./make-libhello.sh
105 |    python configure.py
106 |    make
107 |    python run.py
108 | 
109 | swig-demo
110 | ---------
111 | 
112 | A simple example of using SWIG to wrap a C module for Python.
113 | 
114 | Execute: ::
115 | 
116 | 	 cd swig-demo/
117 | 	 make
118 | 	 python run.py
119 | 


--------------------------------------------------------------------------------
/publish/code/ctypes/Makefile:
--------------------------------------------------------------------------------
1 | LINUX=-shared
2 | MAC_OSX=-bundle -undefined dynamic_lookup
3 | 
4 | hello.so:
5 | 	gcc -o hello.so $(MAC_OSX) -fPIC hello.c
6 | 
7 | 


--------------------------------------------------------------------------------
/publish/code/ctypes/hello.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | char * hello(char * name)
 6 | {
 7 |   char * buf;
 8 |   buf = (char *) malloc(strlen("hello, ") + strlen(name) + 1);
 9 | 
10 |   sprintf(buf, "hello, %s", name);
11 |   return buf;
12 | }
13 | 


--------------------------------------------------------------------------------
/publish/code/ctypes/hello.h:
--------------------------------------------------------------------------------
1 | char * hello(char * name);
2 | 


--------------------------------------------------------------------------------
/publish/code/ctypes/hello_mod.py:
--------------------------------------------------------------------------------
 1 | from ctypes import c_char_p, cdll
 2 | 
 3 | hello_lib = cdll.LoadLibrary("hello.so")
 4 | hello = hello_lib.hello
 5 | print hello("world")                    # this won't work!
 6 | 
 7 | # ==> need to set return type...
 8 | hello.restype = c_char_p
 9 | assert hello("world") == "hello, world"
10 | 


--------------------------------------------------------------------------------
/publish/code/hello/hello.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | #include "hello.h"
 5 | 
 6 | char * hello(char * name)
 7 | {
 8 |   char * buf;
 9 |   buf = (char *) malloc(strlen("hello, ") + strlen(name) + 1);
10 | 
11 |   printf("(in hello)\n");
12 |   sprintf(buf, "hello, %s", name);
13 |   return buf;
14 | }
15 | 


--------------------------------------------------------------------------------
/publish/code/hello/hello.h:
--------------------------------------------------------------------------------
1 | char * hello(char * name);
2 | 


--------------------------------------------------------------------------------
/publish/code/hello/hellomodule.c:
--------------------------------------------------------------------------------
  1 | #include "Python.h"
  2 | #include "hello.h"
  3 | 
  4 | typedef struct {
  5 |   int a;
  6 |   int b;
  7 |   char c[50];
  8 | } my_struct;
  9 | 
 10 | static void free_my_struct(void * p)
 11 | {
 12 |   printf("Let my_struct run free!\n");
 13 |   my_struct * v = (my_struct *) p;
 14 |   free(v);
 15 | }
 16 | 
 17 | static PyObject * cobj_out(PyObject * self, PyObject * args)
 18 | {
 19 |   PyObject * p;
 20 |   my_struct * v;
 21 | 
 22 |   if (!PyArg_ParseTuple(args, "O", &p)) {
 23 |     return NULL;
 24 |   }
 25 |   
 26 |   v = PyCObject_AsVoidPtr(p);
 27 |   printf("struct is %d, %d, %s\n", v->a, v->b, v->c);
 28 |   
 29 |   Py_INCREF(Py_None);
 30 |   return Py_None;
 31 | }
 32 | 
 33 | static PyObject * cobj_in(PyObject * self, PyObject * args)
 34 | {
 35 |   if (!PyArg_ParseTuple(args, "")) {
 36 |     return NULL;
 37 |   }
 38 |   my_struct * x = malloc(sizeof(my_struct));
 39 | 
 40 |   x->a = 15;
 41 |   x->b = 25;
 42 |   sprintf(x->c, "this is my string");
 43 | 
 44 |   return PyCObject_FromVoidPtr(x, free_my_struct);
 45 | }
 46 | 
 47 | 
 48 | static PyObject * complex_return_type2(PyObject * self, PyObject * args)
 49 | {
 50 |   int key_1 = 5;
 51 |   int key_2 = 6;
 52 |   char * key_3 = "this is my key";
 53 | 
 54 |   char * value_1 = "this is value 1";
 55 |   char * value_2 = "this is another value";
 56 |   char * value_3 = "this is my final value";
 57 | 
 58 |   if (!PyArg_ParseTuple(args, "")) {
 59 |     return NULL;
 60 |   }
 61 | 
 62 |   return Py_BuildValue("[iissss]", key_1, key_2, key_3,
 63 | 		       value_1, value_2, value_3);
 64 | }
 65 | 
 66 | 
 67 | static PyObject * complex_return_type(PyObject * self, PyObject * args)
 68 | {
 69 |   int key_1 = 5;
 70 |   int key_2 = 6;
 71 |   char * key_3 = "this is my key";
 72 | 
 73 |   char * value_1 = "this is value 1";
 74 |   char * value_2 = "this is another value";
 75 |   char * value_3 = "this is my final value";
 76 | 
 77 |   if (!PyArg_ParseTuple(args, "")) {
 78 |     return NULL;
 79 |   }
 80 | 
 81 |   PyObject * my_dict = PyDict_New();
 82 | 
 83 |   PyObject * key_1_o = PyInt_FromLong(key_1);
 84 |   PyObject * key_2_o = PyInt_FromLong(key_2);
 85 |   PyObject * key_3_o = PyString_FromString(key_3);
 86 | 
 87 |   PyObject * value_1_o = PyString_FromString(value_1);
 88 |   PyObject * value_2_o = PyString_FromString(value_2);
 89 |   PyObject * value_3_o = PyString_FromString(value_3);
 90 | 
 91 |   if (PyDict_SetItem(my_dict, key_1_o, value_1_o) == -1) {
 92 |     return NULL;
 93 |   }
 94 |   if (PyDict_SetItem(my_dict, key_2_o, value_2_o) == -1) {
 95 |     return NULL;
 96 |   }
 97 |   if (PyDict_SetItem(my_dict, key_3_o, value_3_o) == -1) {
 98 |     return NULL;
 99 |   }
100 | 
101 |   return my_dict;
102 | }
103 | 
104 | static PyObject * hello_wrapper(PyObject * self, PyObject * args)
105 | {
106 |   char * input = NULL;
107 |   char * result;
108 |   PyObject * ret;
109 | 
110 |   if (!PyArg_ParseTuple(args, "|s", &input)) {
111 |     return NULL;
112 |   }
113 | 
114 |   if (input == NULL) {
115 |     Py_INCREF(Py_None);
116 |     return Py_None;
117 |   }
118 | 
119 |   result = hello(input);
120 | 
121 |   ret = PyString_FromString(result);
122 |   free(result);
123 | 
124 |   return ret;
125 | }
126 | 
127 | static PyMethodDef HelloMethods[] = {
128 |   { "hello", hello_wrapper, METH_VARARGS, "Say hello" },
129 |   { "complex", complex_return_type, METH_VARARGS, "Return a complex data type" },
130 |   { "complex2", complex_return_type2, METH_VARARGS, "Return a complex data type" },
131 |   { "cobj_in", cobj_in, METH_VARARGS, "" },
132 |   { "cobj_out", cobj_out, METH_VARARGS, "" },
133 |   { NULL, NULL, 0, NULL }
134 | };
135 | 
136 | DL_EXPORT(void) inithello(void)
137 | {
138 |   Py_InitModule("hello", HelloMethods);
139 | }
140 | 


--------------------------------------------------------------------------------
/publish/code/hello/run.py:
--------------------------------------------------------------------------------
1 | import hello
2 | print hello.hello('world!')
3 | assert hello.hello('world!') == 'hello, world!'
4 | 


--------------------------------------------------------------------------------
/publish/code/hello/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup, Extension
2 | 
3 | # the c++ extension module
4 | extension_mod = Extension("hello", ["hellomodule.c", "hello.c"])
5 | 
6 | setup(name = "hello", ext_modules=[extension_mod])
7 | 


--------------------------------------------------------------------------------
/publish/code/parallelpython/calc_primes.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys, time
 3 | import pp
 4 | 
 5 | def isprime(n):
 6 |     """Returns True if n is prime and False otherwise"""
 7 |     import math
 8 |     
 9 |     if n < 2:
10 |         return False
11 |     if n == 2:
12 |         return True
13 |     max = int(math.ceil(math.sqrt(n)))
14 |     i = 2
15 |     while i <= max:
16 |         if n % i == 0:
17 |             return False
18 |         i += 1
19 |     return True
20 | 
21 | def sum_primes(n):
22 |     """Calculates sum of all primes below given integer n"""
23 |     return sum([x for x in xrange(2, n) if isprime(x)])
24 | 
25 | ####
26 | 
27 | # Creates jobserver with specified number of workers
28 | job_server = pp.Server(ncpus=int(sys.argv[1]))
29 | 
30 | print "Starting pp with", job_server.get_ncpus(), "workers"
31 | 
32 | start_time = time.time()
33 | 
34 | # Submit a job of calulating sum_primes(100) for execution.
35 | #
36 | #    * sum_primes - the function
37 | #    * (input,) - tuple with arguments for sum_primes
38 | #    * (isprime,) - tuple with functions on which function sum_primes depends
39 | #
40 | # Execution starts as soon as one of the workers will become available
41 | 
42 | inputs = (100000, 100100, 100200, 100300, 100400, 100500, 100600, 100700)
43 | 
44 | jobs = []
45 | for input in inputs:
46 |     job = job_server.submit(sum_primes, (input,), (isprime,))
47 |     jobs.append(job)
48 | 
49 | for job, input in zip(jobs, inputs):
50 |     print "Sum of primes below", input, "is", job()
51 | 
52 | print "Time elapsed: ", time.time() - start_time, "s"
53 | job_server.print_stats()
54 | 


--------------------------------------------------------------------------------
/publish/code/profiling/count.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import psyco
 3 |     psyco.full()
 4 | except ImportError:
 5 |     pass
 6 | 
 7 | def count3(limit=5):
 8 |     l = []
 9 |     for i in range(limit):
10 |         l.append(i)
11 | 
12 | def count1(limit=50000):
13 |     x = 0
14 |     for i in range(0, limit):
15 |         count3()
16 |         x += i
17 | 
18 |     return x
19 | 
20 | def count2(limit=150000):
21 |     x = 0
22 |     for i in range(0, limit):
23 |         count3(10)
24 |         x += i
25 | 
26 |     return x
27 | 


--------------------------------------------------------------------------------
/publish/code/profiling/hotshot.prof:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctb/advanced-swc/af81ac874f6a714a27bc378a1e919695ac5c0da3/publish/code/profiling/hotshot.prof


--------------------------------------------------------------------------------
/publish/code/profiling/profile1.py:
--------------------------------------------------------------------------------
 1 | 
 2 |     x = 0
 3 |     for i in range(0, limit):
 4 |         x += i
 5 | 
 6 |     return x
 7 | 
 8 | def count2(limit=1500000):
 9 |     x = 0
10 |     for i in range(0, limit):
11 |         x += i
12 | 
13 |     return x
14 | 
15 | if __name__ == '__main__':
16 |     print ''
17 |     print '*** STATPROF output ***'
18 |     print ''
19 |     
20 |     import statprof
21 |     statprof.start()
22 |     count1()
23 |     count2()
24 |     statprof.stop()
25 |     statprof.display()
26 | 
27 |     print ''
28 |     print '*** HOTSHOT output *** '
29 |     print ''
30 | 
31 |     import hotshot, hotshot.stats
32 |     prof = hotshot.Profile('profile1.prof')
33 |     prof.runcall(count1)
34 |     prof.runcall(count2)
35 |     prof.close()
36 | 
37 |     stats = hotshot.stats.load('profile1.prof')
38 |     stats.sort_stats('time', 'calls')
39 |     stats.print_stats(20)
40 | 


--------------------------------------------------------------------------------
/publish/code/profiling/run-cprofile:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | from count import *
 3 | 
 4 | print ''
 5 | print '*** cProfile output *** '
 6 | print ''
 7 | 
 8 | def runboth():
 9 |     count1()
10 |     count2()
11 | 
12 | import cProfile, pstats
13 | cProfile.run('runboth()', 'cprof.out')
14 | 
15 | p = pstats.Stats('cprof.out')
16 | p.sort_stats('time').print_stats(10)
17 | 


--------------------------------------------------------------------------------
/publish/code/profiling/run-hotshot:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python2.5
 2 | from count import *
 3 | 
 4 | print ''
 5 | print '*** HOTSHOT output *** '
 6 | print ''
 7 | 
 8 | import hotshot, hotshot.stats
 9 | prof = hotshot.Profile('hotshot.prof')
10 | prof.runcall(count1)
11 | prof.runcall(count2)
12 | prof.close()
13 | 
14 | stats = hotshot.stats.load('hotshot.prof')
15 | stats.sort_stats('time', 'calls')
16 | stats.print_stats(20)
17 | 


--------------------------------------------------------------------------------
/publish/code/profiling/run-statprof:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python2.5
 2 | from count import *
 3 | 
 4 | print ''
 5 | print '*** STATPROF output ***'
 6 | print ''
 7 | 
 8 | import statprof
 9 | statprof.start()
10 | count1()
11 | count2()
12 | statprof.stop()
13 | statprof.display()
14 | 


--------------------------------------------------------------------------------
/publish/code/profiling/run-timeit:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | from timeit import Timer
 3 | from count import *
 4 | 
 5 | t1 = Timer("count1()", "from count import count1")
 6 | print 'count1:', t1.timeit(number=1)
 7 | 
 8 | t2 = Timer("count2()", "from count import count2")
 9 | print 'count2:', t2.timeit(number=1)
10 | 


--------------------------------------------------------------------------------
/publish/code/profiling/statprof.py:
--------------------------------------------------------------------------------
  1 | ## statprof.py
  2 | ## Copyright (C) 2004,2005 Andy Wingo <wingo at pobox dot com>
  3 | ## Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
  4 | 
  5 | ## This library is free software; you can redistribute it and/or
  6 | ## modify it under the terms of the GNU Lesser General Public
  7 | ## License as published by the Free Software Foundation; either
  8 | ## version 2.1 of the License, or (at your option) any later version.
  9 | ##
 10 | ## This library is distributed in the hope that it will be useful,
 11 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 | ## Lesser General Public License for more details.
 14 | ##
 15 | ## You should have received a copy of the GNU Lesser General Public
 16 | ## License along with this program; if not, contact:
 17 | ##
 18 | ## Free Software Foundation           Voice:  +1-617-542-5942
 19 | ## 59 Temple Place - Suite 330        Fax:    +1-617-542-2652
 20 | ## Boston, MA  02111-1307,  USA       gnu@gnu.org
 21 | 
 22 | """
 23 | statprof is intended to be a fairly simple statistical profiler for
 24 | python. It was ported directly from a statistical profiler for guile,
 25 | also named statprof, available from guile-lib [0].
 26 | 
 27 | [0] http://wingolog.org/software/guile-lib/statprof/
 28 | 
 29 | To start profiling, call statprof.start():
 30 | >>> start()
 31 | 
 32 | Then run whatever it is that you want to profile, for example:
 33 | >>> import test.pystone; test.pystone.pystones()
 34 | 
 35 | Then stop the profiling and print out the results:
 36 | >>> stop()
 37 | >>> display()
 38 |   %   cumulative      self          
 39 |  time    seconds   seconds  name    
 40 |  26.72      1.40      0.37  pystone.py:79:Proc0
 41 |  13.79      0.56      0.19  pystone.py:133:Proc1
 42 |  13.79      0.19      0.19  pystone.py:208:Proc8
 43 |  10.34      0.16      0.14  pystone.py:229:Func2
 44 |   6.90      0.10      0.10  pystone.py:45:__init__
 45 |   4.31      0.16      0.06  pystone.py:53:copy
 46 |     ...
 47 | 
 48 | All of the numerical data with the exception of the calls column is
 49 | statistically approximate. In the following column descriptions, and
 50 | in all of statprof, "time" refers to execution time (both user and
 51 | system), not wall clock time.
 52 | 
 53 | % time
 54 |     The percent of the time spent inside the procedure itself (not
 55 |     counting children).
 56 | 
 57 | cumulative seconds
 58 |     The total number of seconds spent in the procedure, including
 59 |     children.
 60 | 
 61 | self seconds
 62 |     The total number of seconds spent in the procedure itself (not
 63 |     counting children).
 64 | 
 65 | name
 66 |     The name of the procedure.
 67 | 
 68 | By default statprof keeps the data collected from previous runs. If you
 69 | want to clear the collected data, call reset():
 70 | >>> reset()
 71 | 
 72 | reset() can also be used to change the sampling frequency. For example,
 73 | to tell statprof to sample 50 times a second:
 74 | >>> reset(50)
 75 | 
 76 | This means that statprof will sample the call stack after every 1/50 of
 77 | a second of user + system time spent running on behalf of the python
 78 | process. When your process is idle (for example, blocking in a read(),
 79 | as is the case at the listener), the clock does not advance. For this
 80 | reason statprof is not currently not suitable for profiling io-bound
 81 | operations.
 82 | 
 83 | The profiler uses the hash of the code object itself to identify the
 84 | procedures, so it won't confuse different procedures with the same name.
 85 | They will show up as two different rows in the output.
 86 | 
 87 | Right now the profiler is quite simplistic.  I cannot provide
 88 | call-graphs or other higher level information.  What you see in the
 89 | table is pretty much all there is. Patches are welcome :-)
 90 | 
 91 | 
 92 | Threading
 93 | ---------
 94 | 
 95 | Because signals only get delivered to the main thread in Python,
 96 | statprof only profiles the main thread. However because the time
 97 | reporting function uses per-process timers, the results can be
 98 | significantly off if other threads' work patterns are not similar to the
 99 | main thread's work patterns.
100 | 
101 | 
102 | Implementation notes
103 | --------------------
104 | 
105 | The profiler works by setting the unix profiling signal ITIMER_PROF to
106 | go off after the interval you define in the call to reset(). When the
107 | signal fires, a sampling routine is run which looks at the current
108 | procedure that's executing, and then crawls up the stack, and for each
109 | frame encountered, increments that frame's code object's sample count.
110 | Note that if a procedure is encountered multiple times on a given stack,
111 | it is only counted once. After the sampling is complete, the profiler
112 | resets profiling timer to fire again after the appropriate interval.
113 | 
114 | Meanwhile, the profiler keeps track, via os.times(), how much CPU time
115 | (system and user -- which is also what ITIMER_PROF tracks), has elapsed
116 | while code has been executing within a start()/stop() block.
117 | 
118 | The profiler also tries to avoid counting or timing its own code as
119 | much as possible.
120 | """
121 | 
122 | 
123 | from __future__ import division
124 | 
125 | try:
126 |     import itimer
127 | except ImportError:
128 |     raise ImportError('''statprof requires the itimer python extension.
129 | To install it, enter the following commands from a terminal:
130 | 
131 | wget http://www.cute.fi/~torppa/py-itimer/py-itimer.tar.gz
132 | tar zxvf py-itimer.tar.gz
133 | cd py-itimer
134 | sudo python setup.py install
135 | ''')
136 | 
137 | import signal
138 | import os
139 | 
140 | 
141 | __all__ = ['start', 'stop', 'reset', 'display']
142 | 
143 | 
144 | ###########################################################################
145 | ## Utils
146 | 
147 | def clock():
148 |     times = os.times()
149 |     return times[0] + times[1]
150 | 
151 | 
152 | ###########################################################################
153 | ## Collection data structures
154 | 
155 | class ProfileState(object):
156 |     def __init__(self, frequency=None):
157 |         self.reset(frequency)
158 | 
159 |     def reset(self, frequency=None):
160 |         # total so far
161 |         self.accumulated_time = 0.0
162 |         # start_time when timer is active
163 |         self.last_start_time = None
164 |         # total count of sampler calls
165 |         self.sample_count = 0
166 |         # a float
167 |         if frequency:
168 |             self.sample_interval = 1.0/frequency
169 |         elif not hasattr(self, 'sample_interval'):
170 |             # default to 100 Hz
171 |             self.sample_interval = 1.0/100.0
172 |         else:
173 |             # leave the frequency as it was
174 |             pass
175 |         self.remaining_prof_time = None
176 |         # for user start/stop nesting
177 |         self.profile_level = 0
178 |         # whether to catch apply-frame
179 |         self.count_calls = False
180 |         # gc time between start() and stop()
181 |         self.gc_time_taken = 0
182 | 
183 |     def accumulate_time(self, stop_time):
184 |         self.accumulated_time += stop_time - self.last_start_time
185 | 
186 | state = ProfileState()
187 | 
188 | ## call_data := { code object: CallData }
189 | call_data = {}
190 | class CallData(object):
191 |     def __init__(self, code):
192 |         self.name = code.co_name
193 |         self.filename = code.co_filename
194 |         self.lineno = code.co_firstlineno
195 |         self.call_count = 0
196 |         self.cum_sample_count = 0
197 |         self.self_sample_count = 0
198 |         call_data[code] = self
199 | 
200 | def get_call_data(code):
201 |     return call_data.get(code, None) or CallData(code)
202 | 
203 | 
204 | ###########################################################################
205 | ## SIGPROF handler
206 | 
207 | def sample_stack_procs(frame):
208 |     state.sample_count += 1
209 |     get_call_data(frame.f_code).self_sample_count += 1
210 | 
211 |     code_seen = {}
212 |     while frame:
213 |         code_seen[frame.f_code] = True
214 |         frame = frame.f_back
215 |     for code in code_seen.iterkeys():
216 |         get_call_data(code).cum_sample_count += 1
217 | 
218 | def profile_signal_handler(signum, frame):
219 |     if state.profile_level > 0:
220 |         state.accumulate_time(clock())
221 |         sample_stack_procs(frame)
222 |         itimer.setitimer(itimer.ITIMER_PROF,
223 |             state.sample_interval, 0.0)
224 |         state.last_start_time = clock()
225 | 
226 | 
227 | ###########################################################################
228 | ## Profiling API
229 | 
230 | def is_active():
231 |     return state.profile_level > 0
232 | 
233 | def start():
234 |     state.profile_level += 1
235 |     if state.profile_level == 1:
236 |         state.last_start_time = clock()
237 |         rpt = state.remaining_prof_time
238 |         state.remaining_prof_time = None
239 |         signal.signal(signal.SIGPROF, profile_signal_handler)
240 |         itimer.setitimer(itimer.ITIMER_PROF,
241 |             rpt or state.sample_interval, 0.0)
242 |         state.gc_time_taken = 0 # dunno
243 |   
244 | def stop():
245 |     state.profile_level -= 1
246 |     if state.profile_level == 0:
247 |         state.accumulate_time(clock())
248 |         state.last_start_time = None
249 |         rpt = itimer.setitimer(itimer.ITIMER_PROF, 0.0, 0.0)
250 |         signal.signal(signal.SIGPROF, signal.SIG_IGN)
251 |         state.remaining_prof_time = rpt[0]
252 |         state.gc_time_taken = 0 # dunno
253 |     
254 | def reset(frequency=None):
255 |     assert state.profile_level == 0, "Can't reset() while statprof is running"
256 |     call_data.clear()
257 |     state.reset(frequency)
258 |     
259 | 
260 | ###########################################################################
261 | ## Reporting API
262 | 
263 | class CallStats(object):
264 |     def __init__(self, call_data):
265 |         self_samples = call_data.self_sample_count
266 |         cum_samples = call_data.cum_sample_count
267 |         nsamples = state.sample_count
268 |         secs_per_sample = state.accumulated_time / nsamples
269 |         basename = os.path.basename(call_data.filename)
270 | 
271 |         self.name = '%s:%d:%s' % (basename, call_data.lineno, call_data.name)
272 |         self.pcnt_time_in_proc = self_samples / nsamples * 100
273 |         self.cum_secs_in_proc = cum_samples * secs_per_sample
274 |         self.self_secs_in_proc = self_samples * secs_per_sample
275 |         self.num_calls = None
276 |         self.self_secs_per_call = None
277 |         self.cum_secs_per_call = None
278 | 
279 |     def display(self, OUT=None, percent_threshold=None):
280 |         if percent_threshold is not None:
281 |             if self.pcnt_time_in_proc <= percent_threshold:
282 |                 return
283 |                 
284 |         print>>OUT, '%6.2f %9.2f %9.2f  %s' % (self.pcnt_time_in_proc,
285 |                                          self.cum_secs_in_proc,
286 |                                          self.self_secs_in_proc,
287 |                                          self.name)
288 | 
289 | def clean_coverage_count(OUT=None):
290 |     global call_data
291 |     l = call_data.items()
292 |     d = {}
293 |     for k, v in l:
294 |         if os.path.basename(v.filename) == 'coverage.py':
295 |             print>>OUT, '** REMOVING coverage.py'
296 |             state.sample_count -= v.self_sample_count
297 |             continue
298 |         d[k] = v
299 | 
300 |     call_data = d
301 | 
302 | def display(OUT=None, percent_threshold=None):
303 |     if state.sample_count == 0:
304 |         print>>OUT, 'No samples recorded.'
305 |         return
306 | 
307 |     l = [CallStats(x) for x in call_data.itervalues()]
308 |     l = [(x.self_secs_in_proc, x.cum_secs_in_proc, x) for x in l]
309 |     l.sort()
310 |     l.reverse()
311 |     l = [x[2] for x in l]
312 | 
313 |     print>>OUT, '%5.5s %10.10s   %7.7s  %-8.8s' % ('%  ', 'cumulative', 'self', '')
314 |     print>>OUT, '%5.5s  %9.9s  %8.8s  %-8.8s' % ("time", "seconds", "seconds", "name")
315 | 
316 |     for x in l:
317 |         x.display(OUT, percent_threshold=percent_threshold)
318 | 
319 |     print>>OUT, '---'
320 |     print>>OUT, 'Sample count: %d' % state.sample_count
321 |     print>>OUT, 'Total time: %f seconds' % state.accumulated_time
322 | 


--------------------------------------------------------------------------------
/publish/code/psyco/psyco-test.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import sys
 3 | 
 4 | def divides(primes, n):
 5 |     for trial in primes:
 6 |         if n % trial == 0: return True
 7 |     return False
 8 | 
 9 | def prime_sieve():
10 |     p, current = [], 1
11 |     while 1:
12 |         current += 1
13 |         if not divides(p, current): # if any previous primes divide, cancel
14 |             p.append(current)           # this is prime! save & return
15 |             yield current
16 | 
17 | if int(sys.argv[1]):
18 |     import psyco
19 |     psyco.full()
20 | 
21 | import time
22 | t0 = time.time()
23 | for prime in prime_sieve():
24 |     if prime > 100000:
25 |         break
26 | t1 = time.time()
27 | 
28 | print t1 - t0
29 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-hello/hello.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | char * hello(char * name)
 6 | {
 7 |   char * buf;
 8 |   buf = (char *) malloc(strlen("hello, ") + strlen(name) + 1);
 9 | 
10 |   sprintf(buf, "hello, %s", name);
11 |   return buf;
12 | }
13 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-hello/hello.h:
--------------------------------------------------------------------------------
1 | char * hello(char * name);
2 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-hello/hellomodule.pyx:
--------------------------------------------------------------------------------
1 | cdef extern from "hello.h":
2 |     char * hello(char *s)
3 | 
4 | def hello_fn(s=None):
5 |     return hello(s)
6 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-hello/run.py:
--------------------------------------------------------------------------------
1 | import hellomodule
2 | assert hellomodule.hello_fn("world") == "hello, world"
3 | print hellomodule.hello_fn("world")
4 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-hello/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Pyrex.Distutils import build_ext
 4 | 
 5 | setup(
 6 |   name = "hello",
 7 |   ext_modules=[ Extension("hellomodule", ["hellomodule.pyx", "hello.c"]) ],
 8 |   cmdclass = {'build_ext': build_ext}
 9 | )
10 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-primes/primes.pyx:
--------------------------------------------------------------------------------
 1 | #
 2 | #  Calculate prime numbers
 3 | #
 4 | 
 5 | def primes(int maxprime):
 6 |   cdef int n, k, i
 7 |   cdef int p[100000]
 8 |   result = []
 9 |   k = 0
10 |   n = 2
11 |   while n < maxprime:
12 |     i = 0
13 | 
14 |     # test against previous primes
15 |     while i < k and n % p[i] <> 0:
16 |       i = i + 1
17 | 
18 |     # prime? if so, save.
19 |     if i == k:
20 |       p[k] = n
21 |       k = k + 1
22 |       result.append(n)
23 |     n = n + 1
24 | 
25 |   return result
26 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-primes/run-primes.py:
--------------------------------------------------------------------------------
1 | import primes
2 | l = primes.primes(100000)
3 | assert l[-5:] == [99929, 99961, 99971, 99989, 99991]
4 | print l[-5:]
5 | 


--------------------------------------------------------------------------------
/publish/code/pyrex-primes/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Pyrex.Distutils import build_ext
 4 | 
 5 | setup(
 6 |   name = "primes",
 7 |   ext_modules=[ 
 8 |     Extension("primes", ["primes.pyx"], libraries = [])
 9 |     ],
10 |   cmdclass = {'build_ext': build_ext}
11 | )
12 | 


--------------------------------------------------------------------------------
/publish/code/rpy/do-pca.py:
--------------------------------------------------------------------------------
 1 | from rpy import *
 2 | 
 3 | def plot_pca(filename):
 4 |     r("""data <- read.delim('%s', header=FALSE, sep=" ", nrows=5000)""" \
 5 |       % (filename,))
 6 | 
 7 |     r("""pca <- prcomp(data, scale=FALSE, center=FALSE)""")
 8 |     r("""pairs(pca$x[,1:3], pch=20)""")
 9 | 
10 | plot_pca('vectors.txt')
11 | 


--------------------------------------------------------------------------------
/publish/code/rpy/gen-vectors.py:
--------------------------------------------------------------------------------
1 | import random
2 | 
3 | for i in range(0, 5000):
4 |     print random.randint(0, 10), random.randint(5,12), random.randint(1,2)
5 | 


--------------------------------------------------------------------------------
/publish/code/run-all.sh:
--------------------------------------------------------------------------------
 1 | cd ctypes/ && (make && LD_LIBRARY_PATH=. python hello_mod.py) || echo ERROR
 2 | cd ../
 3 | 
 4 | cd hello/ && python setup.py build_ext --inplace || echo ERROR
 5 | cd ../
 6 | 
 7 | cd parallelpython && python calc_primes.py 2 || echo ERROR
 8 | cd ../
 9 | 
10 | cd profiling && (python run-cprofile && python run-hotshot && \
11 |     python run-statprof && python run-timeit) || echo ERROR
12 | cd ../
13 | 
14 | cd psyco && python psyco-test.py 1 || echo ERROR
15 | cd ../
16 | 
17 | cd pyrex-hello/ && (python setup.py build_ext --inplace && python run.py) \
18 |     || echo ERROR
19 | cd ../
20 | 
21 | cd pyrex-primes/ && (python setup.py build_ext --inplace && \
22 |     python run-primes.py) || echo ERROR
23 | cd ../
24 | 
25 | cd rpy/ && python do-pca.py || echo ERROR
26 | cd ../
27 | 
28 | cd sip/ && (bash make-libhello.sh && python configure.py) || echo ERROR
29 | cd ../
30 | 
31 | cd swig-demo/ && make && python run.py || echo ERROR
32 | cd ../
33 | 
34 | cd pyrex-primes/ && (python setup.py build_ext --inplace && \
35 |     python run-primes.py) || echo ERROR
36 | cd ../
37 | 
38 | cd sip/ && (bash ./make-libhello.sh && python configure.py && make) \
39 |     || echo ERROR
40 | cd ../
41 | 
42 | cd swig-demo/ && (make && python run.py) || echo ERROR
43 | cd ../
44 | 


--------------------------------------------------------------------------------
/publish/code/sip/Makefile:
--------------------------------------------------------------------------------
1 | TARGET = hellomodule.so
2 | OFILES = siphellomodulecmodule.o
3 | HFILES = sipAPIhellomodule.h 
4 | 
5 | CC = cc
6 | CXX = c++
7 | LINK = c++
8 | 


--------------------------------------------------------------------------------
/publish/code/sip/configure.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sipconfig
 3 | 
 4 | # The name of the SIP build file generated by SIP and used by the build
 5 | # system.
 6 | build_file = "hello.sbf"
 7 | 
 8 | # Get the SIP configuration information.
 9 | config = sipconfig.Configuration()
10 | 
11 | # Run SIP to generate the code.
12 | os.system(" ".join([config.sip_bin, "-c", ".", "-b", build_file, "hello.sip"]))
13 | 
14 | # Create the Makefile.
15 | makefile = sipconfig.SIPModuleMakefile(config, build_file)
16 | 
17 | # Add the library we are wrapping.  The name doesn't include any platform
18 | # specific prefixes or extensions (e.g. the "lib" prefix on UNIX, or the
19 | # ".dll" extension on Windows).
20 | makefile.extra_libs = ["hello"]
21 | makefile.extra_lib_dirs = ["."]
22 | 
23 | # Generate the Makefile itself.
24 | makefile.generate()
25 | 


--------------------------------------------------------------------------------
/publish/code/sip/hello.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | char * hello(char * name)
 6 | {
 7 |   char * buf;
 8 |   buf = (char *) malloc(strlen("hello, ") + strlen(name) + 1);
 9 | 
10 |   printf("in hello\n");
11 | 
12 |   sprintf(buf, "hello, %s", name);
13 |   return buf;
14 | }
15 | 


--------------------------------------------------------------------------------
/publish/code/sip/hello.h:
--------------------------------------------------------------------------------
1 | char * hello(char * name);
2 | 


--------------------------------------------------------------------------------
/publish/code/sip/hello.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ctb/advanced-swc/af81ac874f6a714a27bc378a1e919695ac5c0da3/publish/code/sip/hello.o


--------------------------------------------------------------------------------
/publish/code/sip/hello.sip:
--------------------------------------------------------------------------------
1 | %CModule hellomodule 0
2 | 
3 | char * hello(char *);
4 | 


--------------------------------------------------------------------------------
/publish/code/sip/make-libhello.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | gcc -c hello.c
3 | ar rv libhello.a hello.o
4 | 


--------------------------------------------------------------------------------
/publish/code/sip/run.py:
--------------------------------------------------------------------------------
1 | import hellomodule
2 | assert hellomodule.hello('world') == 'hello, world'
3 | print hellomodule.hello('world')
4 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	swig -python -c++ -o _swigdemo_module.cc swigdemo.i
3 | 	python setup.py build_ext --inplace
4 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/run.py:
--------------------------------------------------------------------------------
1 | import swigdemo
2 | assert swigdemo.hello("world") == "hello, world"
3 | print swigdemo.hello("world")
4 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup, Extension
2 | 
3 | # the c++ extension module (needs to be linked in with libmotility...)
4 | extension_mod = Extension("_swigdemo",
5 |                           ["_swigdemo_module.cc", "swigdemo.cc"])
6 | 
7 | setup(name = "swigdemo", version="blah", ext_modules=[extension_mod])
8 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/swigdemo.cc:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | char * hello(char * name)
 6 | {
 7 |   char * buf;
 8 |   buf = (char *) malloc(strlen("hello, ") + strlen(name) + 1);
 9 | 
10 |   sprintf(buf, "hello, %s", name);
11 |   return buf;
12 | }
13 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/swigdemo.hh:
--------------------------------------------------------------------------------
1 | char * hello(char * name);
2 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/swigdemo.i:
--------------------------------------------------------------------------------
1 | %module swigdemo
2 | 
3 | %{
4 | #include <stdlib.h>
5 | #include "swigdemo.hh"
6 | %}
7 | 
8 | %include "swigdemo.hh"
9 | 


--------------------------------------------------------------------------------
/publish/code/swig-demo/swigdemo.py:
--------------------------------------------------------------------------------
 1 | # This file was automatically generated by SWIG (http://www.swig.org).
 2 | # Version 1.3.31
 3 | #
 4 | # Don't modify this file, modify the SWIG interface instead.
 5 | # This file is compatible with both classic and new-style classes.
 6 | 
 7 | import _swigdemo
 8 | import new
 9 | new_instancemethod = new.instancemethod
10 | try:
11 |     _swig_property = property
12 | except NameError:
13 |     pass # Python < 2.2 doesn't have 'property'.
14 | def _swig_setattr_nondynamic(self,class_type,name,value,static=1):
15 |     if (name == "thisown"): return self.this.own(value)
16 |     if (name == "this"):
17 |         if type(value).__name__ == 'PySwigObject':
18 |             self.__dict__[name] = value
19 |             return
20 |     method = class_type.__swig_setmethods__.get(name,None)
21 |     if method: return method(self,value)
22 |     if (not static) or hasattr(self,name):
23 |         self.__dict__[name] = value
24 |     else:
25 |         raise AttributeError("You cannot add attributes to %s" % self)
26 | 
27 | def _swig_setattr(self,class_type,name,value):
28 |     return _swig_setattr_nondynamic(self,class_type,name,value,0)
29 | 
30 | def _swig_getattr(self,class_type,name):
31 |     if (name == "thisown"): return self.this.own()
32 |     method = class_type.__swig_getmethods__.get(name,None)
33 |     if method: return method(self)
34 |     raise AttributeError,name
35 | 
36 | def _swig_repr(self):
37 |     try: strthis = "proxy of " + self.this.__repr__()
38 |     except: strthis = ""
39 |     return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,)
40 | 
41 | import types
42 | try:
43 |     _object = types.ObjectType
44 |     _newclass = 1
45 | except AttributeError:
46 |     class _object : pass
47 |     _newclass = 0
48 | del types
49 | 
50 | 
51 | hello = _swigdemo.hello
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/publish/combine.py:
--------------------------------------------------------------------------------
1 | import sys
2 | print ''
3 | 
4 | for i in sys.argv[1:]:
5 |     print open(i).read()
6 |     print ''
7 | 


--------------------------------------------------------------------------------
/publish/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Intermediate and Advanced Software Carpentry documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Jul  3 21:50:34 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = ['sphinx.ext.doctest']
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.txt'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'Intermediate and Advanced Software Carpentry'
 44 | copyright = u'2012, C. Titus Brown'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '1.0'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '1.0'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'default'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'IntermediateandAdvancedSoftwareCarpentrydoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'IntermediateandAdvancedSoftwareCarpentry.tex', u'Intermediate and Advanced Software Carpentry Documentation',
187 |    u'C. Titus Brown', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'intermediateandadvancedsoftwarecarpentry', u'Intermediate and Advanced Software Carpentry Documentation',
217 |      [u'C. Titus Brown'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'IntermediateandAdvancedSoftwareCarpentry', u'Intermediate and Advanced Software Carpentry Documentation',
231 |    u'C. Titus Brown', 'IntermediateandAdvancedSoftwareCarpentry', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/publish/data/commented-data.txt:
--------------------------------------------------------------------------------
1 | # this is a comment or a header
2 | 1
3 | # another comment
4 | 2
5 | 


--------------------------------------------------------------------------------
/publish/data/keyvalue.txt:
--------------------------------------------------------------------------------
1 | a 5
2 | b 6
3 | d 7
4 | a 2
5 | c 1
6 | 


--------------------------------------------------------------------------------
/publish/data/listfile.txt:
--------------------------------------------------------------------------------
1 | a
2 | b
3 | c
4 | d
5 | 


--------------------------------------------------------------------------------
/publish/day1.txt:
--------------------------------------------------------------------------------
 1 | Day 1
 2 | -----
 3 | 
 4 | Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    idiomatic-python
10 |    structuring-python
11 |    testing-python
12 |    nose-intro
13 | 


--------------------------------------------------------------------------------
/publish/day2.txt:
--------------------------------------------------------------------------------
 1 | Day 2
 2 | -----
 3 | 
 4 | Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    idiomatic-python-2
10 |    increasing-performance
11 |    tools
12 |    pyparsing-presentation
13 |    online-resources
14 | 


--------------------------------------------------------------------------------
/publish/day3.txt:
--------------------------------------------------------------------------------
 1 | Day 3
 2 | -----
 3 | 
 4 | Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    c++-wrapping
10 |    multiprocessing
11 |    useful-packages
12 |    new-style-classes
13 |    gui-gossip
14 |    python-30
15 | 


--------------------------------------------------------------------------------
/publish/gui-gossip.txt:
--------------------------------------------------------------------------------
 1 | GUI Gossip
 2 | ==========
 3 | 
 4 | Tkinter
 5 | 
 6 |  - fairly primitive;
 7 |  - but: comes with every Python install!
 8 |  - still a bit immature (feb 2007) for Mac OS X native ("Aqua"); X11 version
 9 |    works fine on OS X.
10 | 
11 | 
12 | PyQT (http://www.riverbankcomputing.com/software/pyqt/intro)
13 | 
14 |  - mature;
15 |  - cross platform;
16 |  - freely available for Open Source Software use;
17 |  - has a testing framework!
18 | 
19 | KWWidgets (http://www.kwwidgets.org/)
20 | 
21 |  - immature; based on Tk, so Mac OS X native is still a bit weak;
22 |  - lightweight;
23 |  - attractive;
24 |  - has a testing framework!
25 | 
26 | pyFLTK (http://sf.net/projects/pyfltk/)
27 | 
28 |  - cross platform;
29 |  - FLTK is mature, although primitive;
30 |  - not very pretty;
31 |  - very lightweight;
32 | 
33 | wxWindows (http://www.wxwindows.org/)
34 | 
35 |  - cross platform;
36 |  - mature?; looks good.
37 |  - no personal or "friend" experience;
38 |  - try reading http://www.ibm.com/developerworks/library/l-wxwin.html
39 | 
40 | pyGTK (http://www.pygtk.org/)
41 | 
42 |  - cross platform;
43 |  - mature; looks good.
44 |  - no personal or "friend" experience;
45 |  - UI designer;
46 | 
47 | Mild recommendation: start with Qt, which is apparently very mature
48 | and very powerful.
49 | 


--------------------------------------------------------------------------------
/publish/header.txt:
--------------------------------------------------------------------------------
 1 | .. @CTB
 2 |    binary eggs?
 3 |    multiproc code coverage
 4 | 
 5 | ======================================================
 6 | Intermediate and Advanced Software Carpentry in Python
 7 | ======================================================
 8 | 
 9 | :Author: C Titus Brown
10 | :Date: June 18, 2007
11 | 
12 | Welcome!  You have stumbled upon the class handouts for a course I
13 | taught at Lawrence Livermore National Lab, June 12-June 14, 2007.
14 | 
15 | These notes are intended to *accompany* my lecture, which was a
16 | demonstration of a variety of "intermediate" Python features and
17 | packages.  Because the demonstration was interactive, these notes are
18 | not complete notes of what went on in the course.  (Sorry about that;
19 | they *have* been updated from my actual handouts to be more
20 | complete...)
21 | 
22 | However, all 70 pages are free to view and print, so enjoy.
23 | 
24 | All errors are, of course, my own.  Note that almost all of the
25 | examples starting with '>>>' are doctests, so you can take `the source
26 | <combined.txt>`__ and run doctest on it to make sure I'm being honest.
27 | But do me a favor and run the doctests with Python 2.5 ;).
28 | 
29 | Note that Day 1 of the course ran through the end of "Testing Your
30 | Software"; Day 2 ran through the end of "Online Resources for Python";
31 | and Day 3 finished it off.
32 | 
33 | Example code (mostly from the C extension sections) is available `here
34 | <code.tar.gz>`__; see the `README <code/README.txt>`__ for more information.
35 | 
36 | .. Contents::
37 | 


--------------------------------------------------------------------------------
/publish/idiomatic-python-2.txt:
--------------------------------------------------------------------------------
  1 | Idiomatic Python revisited
  2 | ==========================
  3 | 
  4 | sets
  5 | ----
  6 | 
  7 | Sets recently (2.4?) migrated from a stdlib component into a default type.
  8 | They're exactly what you think: unordered collections of values.
  9 | 
 10 | >>> s = set((1, 2, 3, 4, 5))
 11 | >>> t = set((4, 5, 6))
 12 | >>> print s
 13 | set([1, 2, 3, 4, 5])
 14 | 
 15 | You can union and intersect them:
 16 | 
 17 | >>> print s.union(t)
 18 | set([1, 2, 3, 4, 5, 6])
 19 | >>> print s.intersection(t)
 20 | set([4, 5])
 21 | 
 22 | And you can also check for supersets and subsets:
 23 | 
 24 | >>> u = set((4, 5, 6, 7))
 25 | >>> print t.issubset(u)
 26 | True
 27 | >>> print u.issubset(t)
 28 | False
 29 | 
 30 | One more note: you can convert between sets and lists pretty easily:
 31 | 
 32 | >>> sl = list(s)
 33 | >>> ss = set(sl)
 34 | 
 35 | ``any`` and ``all``
 36 | -------------------
 37 | 
 38 | ``all`` and ``any`` are two new functions in Python that work with iterables
 39 | (e.g. lists, generators, etc.).  ``any`` returns True if *any* element of
 40 | the iterable is True (and False otherwise); ``all`` returns True if *all*
 41 | elements of the iterable are True (and False otherwise).
 42 | 
 43 | Consider:
 44 | 
 45 | >>> x = [ True, False ]
 46 | >>> print any(x)
 47 | True
 48 | >>> print all(x)
 49 | False
 50 | 
 51 | >>> y = [ True, True ]
 52 | >>> print any(x)
 53 | True
 54 | >>> print all(x)
 55 | False
 56 | 
 57 | >>> z = [ False, False ]
 58 | >>> print any(z)
 59 | False
 60 | >>> print all(z)
 61 | False
 62 | 
 63 | Exceptions and exception hierarchies
 64 | ------------------------------------
 65 | 
 66 | You're all familiar with exception handling using try/except:
 67 | 
 68 | >>> x = [1, 2, 3, 4, 5]
 69 | >>> x[10]
 70 | Traceback (most recent call last):
 71 |    ...
 72 | IndexError: list index out of range
 73 | 
 74 | You can catch all exceptions quite easily:
 75 | 
 76 | >>> try:
 77 | ...   y = x[10]
 78 | ... except:
 79 | ...   y = None
 80 | 
 81 | but this is considered bad form, because of the potential for over-broad
 82 | exception handling:
 83 | 
 84 | >>> try:
 85 | ...   y = x["10"]
 86 | ... except:
 87 | ...   y = None
 88 | 
 89 | In general, try to catch the exception most specific to your code:
 90 | 
 91 | >>> try:
 92 | ...   y = x[10]
 93 | ... except IndexError:
 94 | ...   y = None
 95 | 
 96 | ...because then you will see the errors you didn't plan for:
 97 | 
 98 | >>> try:
 99 | ...   y = x["10"]
100 | ... except IndexError:
101 | ...   y = None
102 | Traceback (most recent call last):
103 |    ...
104 | TypeError: list indices must be integers
105 | 
106 | Incidentally, you can re-raise exceptions, potentially after doing
107 | something else:
108 | 
109 | >>> try:
110 | ...   y = x[10]
111 | ... except IndexError:
112 | ...   # do something else here #
113 | ...   raise
114 | Traceback (most recent call last):
115 |    ...
116 | IndexError: list index out of range
117 | 
118 | There are some special exceptions to be aware of.  Two that I run into a lot
119 | are SystemExit and KeyboardInterrupt.  KeyboardInterrupt is what is raised
120 | when a CTRL-C interrupts Python; you can handle it and exit gracefully if
121 | you like, e.g.
122 | 
123 | >>> try:
124 | ...   # do_some_long_running_task()
125 | ...   pass
126 | ... except KeyboardInterrupt:
127 | ...   sys.exit(0)
128 | 
129 | which is sometimes nice for things like Web servers (more on that tomorrow).
130 | 
131 | SystemExit is also pretty useful.  It's actually an exception raised by
132 | ``sys.exit``, i.e.
133 | 
134 | >>> import sys
135 | >>> try:
136 | ...   sys.exit(0)
137 | ... except SystemExit:
138 | ...   pass
139 | 
140 | means that sys.exit has no effect!  You can also raise SystemExit instead
141 | of calling sys.exit, e.g.
142 | 
143 | >>> raise SystemExit(0)
144 | Traceback (most recent call last):
145 |    ...
146 | SystemExit: 0
147 | 
148 | is equivalent to ``sys.exit(0)``:
149 | 
150 | >>> sys.exit(0)
151 | Traceback (most recent call last):
152 |    ...
153 | SystemExit: 0
154 | 
155 | Another nice feature of exceptions is exception hierarchies.
156 | Exceptions are just classes that derive from ``Exception``, and you
157 | can catch exceptions based on their base classes.  So, for example,
158 | you can catch most standard errors by catching the StandardError
159 | exception, from which e.g. IndexError inherits:
160 | 
161 | >>> print issubclass(IndexError, StandardError)
162 | True
163 | 
164 | >>> try:
165 | ...   y = x[10]
166 | ... except StandardError:
167 | ...   y = None
168 | 
169 | You can also catch some exceptions more specifically than others.  For
170 | example, KeyboardInterrupt inherits from Exception, and some times you
171 | want to catch KeyboardInterrupts while ignoring all other exceptions:
172 | 
173 | >>> try:
174 | ...   # ...
175 | ...   pass
176 | ... except KeyboardInterrupt:
177 | ...   raise
178 | ... except Exception:
179 | ...   pass
180 | 
181 | Note that if you want to print out the error, you can do coerce a string
182 | out of the exception to present to the user:
183 | 
184 | >>> try:
185 | ...   y = x[10]
186 | ... except Exception, e:
187 | ...   print 'CAUGHT EXCEPTION!', str(e)
188 | CAUGHT EXCEPTION! list index out of range
189 | 
190 | Last but not least, you can define your own exceptions and exception
191 | hierarchies:
192 | 
193 | >>> class MyFavoriteException(Exception):
194 | ...   pass
195 | >>> raise MyFavoriteException
196 | Traceback (most recent call last):
197 |    ...
198 | MyFavoriteException
199 | 
200 | I haven't used this much myself, but it is invaluable when you are writing
201 | packages that have a lot of different detailed exceptions that you might
202 | want to let users handle.
203 | 
204 | (By default, I usually raise a simple Exception in my own code.)
205 | 
206 | Oh, one more note: AssertionError.  Remember assert?
207 | 
208 | >>> assert 0
209 | Traceback (most recent call last):
210 |    ...
211 | AssertionError
212 | 
213 | Yep, it raises an AssertionError that you can catch, if you REALLY want to...
214 | 
215 | Function Decorators
216 | --------------------
217 | 
218 | Function decorators are a strange beast that I tend to use only in my
219 | testing code and not in my actual application code.  Briefly, function
220 | decorators are functions that take functions as arguments, and return
221 | other functions.  Confused?  Let's see a simple example that makes
222 | sure that no keyword argument named 'something' ever gets passed into
223 | a function:
224 | 
225 | >>> def my_decorator(fn):
226 | ...
227 | ...   def new_fn(*args, **kwargs):
228 | ...      if 'something' in kwargs:
229 | ...         print 'REMOVING', kwargs['something']
230 | ...         del kwargs['something']
231 | ...      return fn(*args, **kwargs)
232 | ...
233 | ...   return new_fn
234 | 
235 | To apply this decorator, use this funny @ syntax:
236 | 
237 | >>> @my_decorator
238 | ... def some_function(a=5, b=6, something=None, c=7):
239 | ...   print a, b, something, c
240 | 
241 | OK, now ``some_function`` has been invisibly replaced with the result of
242 | ``my_decorator``, which is going to be ``new_fn``.  Let's see the result:
243 | 
244 | >>> some_function(something='MADE IT')
245 | REMOVING MADE IT
246 | 5 6 None 7
247 | 
248 | Mind you, without the decorator, the function does exactly what you expect:
249 | 
250 | >>> def some_function(a=5, b=6, something=None, c=7):
251 | ...   print a, b, something, c
252 | >>> some_function(something='MADE IT')
253 | 5 6 MADE IT 7
254 | 
255 | OK, so this is a bit weird.  What possible uses are there for this??
256 | 
257 | Here are three example uses:
258 | 
259 | First, synchronized functions like in Java.  Suppose you had a bunch
260 | of functions (f1, f2, f3...) that could not be called concurrently, so
261 | you wanted to play locks around them.  You could do this with decorators:
262 | 
263 | >>> import threading
264 | >>> def synchronized(fn):
265 | ...   lock = threading.Lock()
266 | ...
267 | ...   def new_fn(*args, **kwargs):
268 | ...      lock.acquire()
269 | ...      print 'lock acquired'
270 | ...      result = fn(*args, **kwargs)
271 | ...      lock.release()
272 | ...      print 'lock released'
273 | ...      return result
274 | ...
275 | ...   return new_fn
276 | 
277 | and then when you define your functions, they will be locked:
278 | 
279 | >>> @synchronized
280 | ... def f1():
281 | ...   print 'in f1'
282 | >>> f1()
283 | lock acquired
284 | in f1
285 | lock released
286 | 
287 | Second, adding attributes to functions.  (This is why I use them in my testing
288 | code sometimes.)  
289 | 
290 | >>> def attrs(**kwds):
291 | ...    def decorate(f):
292 | ...        for k in kwds:
293 | ...            setattr(f, k, kwds[k])
294 | ...        return f
295 | ...    return decorate
296 | 
297 | >>> @attrs(versionadded="2.2",
298 | ...       author="Guido van Rossum")
299 | ... def mymethod(f):
300 | ...    pass
301 | 
302 | >>> print mymethod.versionadded
303 | 2.2
304 | >>> print mymethod.author
305 | Guido van Rossum
306 | 
307 | Third, memoize/caching of results.  Here's a really simple example; you can
308 | find much more general ones online, in particular on the `Python Cookbook
309 | site <http://www.activestate.com/ASPN/Python/Cookbook/>`__.
310 | 
311 | Imagine that you have a CPU-expensive one-parameter function:
312 | 
313 | >>> def expensive(n):
314 | ...   print 'IN EXPENSIVE', n
315 | ...   # do something expensive here, like calculate n'th prime
316 | 
317 | You could write a caching decorator to wrap this function and record
318 | results transparently:
319 | 
320 | >>> def simple_cache(fn):
321 | ...   cache = {}
322 | ...
323 | ...   def new_fn(n):
324 | ...      if n in cache:
325 | ...         print 'FOUND IN CACHE; RETURNING'
326 | ...         return cache[n]
327 | ...
328 | ...      # otherwise, call function & record value
329 | ...      val = fn(n)
330 | ...      cache[n] = val
331 | ...      return val
332 | ...
333 | ...   return new_fn
334 | 
335 | Then use this as a decorator to wrap the expensive function:
336 | 
337 | >>> @simple_cache
338 | ... def expensive(n):
339 | ...   print 'IN THE EXPENSIVE FN:', n
340 | ...   return n**2
341 | 
342 | Now, when you call this function twice with the same argument, if will
343 | only do the calculation once; the second time, the function call will be
344 | intercepted and the cached value will be returned.
345 | 
346 | >>> expensive(55)
347 | IN THE EXPENSIVE FN: 55
348 | 3025
349 | >>> expensive(55)
350 | FOUND IN CACHE; RETURNING
351 | 3025
352 | 
353 | Check out Michele Simionato's writeup of decorators `here
354 | <http://www.phyast.pitt.edu/~micheles/python/documentation.html>`__
355 | for lots more information on decorators.
356 | 
357 | try/finally
358 | -----------
359 | 
360 | Finally, we come to try/finally!
361 | 
362 | The syntax of try/finally is just like try/except: ::
363 | 
364 |     try:
365 |        do_something()
366 |     finally:
367 |        do_something_else()
368 | 
369 | The purpose of try/finally is to ensure that something is done, whether or
370 | not an exception is raised:
371 | 
372 | >>> x = [0, 1, 2]
373 | >>> try:
374 | ...   y = x[5]
375 | ... finally:
376 | ...   x.append('something')
377 | Traceback (most recent call last):
378 |    ...
379 | IndexError: list index out of range
380 | 
381 | >>> print x
382 | [0, 1, 2, 'something']
383 | 
384 | (It's actually semantically equivalent to:
385 | 
386 | >>> try:
387 | ...   y = x[5]
388 | ... except IndexError:
389 | ...   x.append('something')
390 | ...   raise
391 | Traceback (most recent call last):
392 |    ...
393 | IndexError: list index out of range
394 | 
395 | but it's a bit cleaner, because the exception doesn't have to be re-raised
396 | and you don't have to catch a specific exception type.)
397 | 
398 | Well, why do you need this?  Let's think about locking.  First, get a lock:
399 | 
400 | >>> import threading
401 | >>> lock = threading.Lock()
402 | 
403 | Now, if you're locking something, you want to be darn sure to *release*
404 | that lock.  But what if an exception is raised right in the middle?
405 | 
406 | >>> def fn():
407 | ...    print 'acquiring lock'
408 | ...    lock.acquire()
409 | ...    y = x[5]
410 | ...    print 'releasing lock'
411 | ...    lock.release()
412 | >>> try:
413 | ...    fn()
414 | ... except IndexError:
415 | ...   pass
416 | acquiring lock
417 | 
418 | Note that 'releasing lock' is never printed: 'lock' is now left in a
419 | locked state, and next time you run 'fn' you will hang the program
420 | forever.  Oops.
421 | 
422 | You can fix this with try/finally:
423 | 
424 | >>> lock = threading.Lock()		# gotta trash the previous lock, or hang!
425 | >>> def fn():
426 | ...    print 'acquiring lock'
427 | ...    lock.acquire()
428 | ...    try:
429 | ...       y = x[5]
430 | ...    finally:
431 | ...       print 'releasing lock'
432 | ...       lock.release()
433 | >>> try:
434 | ...   fn()
435 | ... except IndexError:
436 | ...   pass
437 | acquiring lock
438 | releasing lock
439 | 
440 | Function arguments, and wrapping functions
441 | ------------------------------------------
442 | 
443 | You may have noticed above (in the section on decorators) that we wrapped
444 | functions using this notation: ::
445 | 
446 |    def wrapper_fn(*args, **kwargs):
447 |        return fn(*args, **kwargs)
448 | 
449 | (This takes the place of the old 'apply'.)  What does this do?
450 | 
451 | Here, \*args assigns all of the positional arguments to a tuple
452 | 'args', and '\*\*kwargs' assigns all of the keyword arguments to a
453 | dictionary 'kwargs':
454 | 
455 | >>> def print_me(*args, **kwargs):
456 | ...   print 'args is:', args
457 | ...   print 'kwargs is:', kwargs
458 | 
459 | >>> print_me(5, 6, 7, test='me', arg2=None)
460 | args is: (5, 6, 7)
461 | kwargs is: {'test': 'me', 'arg2': None}
462 | 
463 | When a function is called with this notation, the args and kwargs are
464 | unpacked appropriately and passed into the function.  For example,
465 | the function ``test_call``
466 | 
467 | >>> def test_call(a, b, c, x=1, y=2, z=3):
468 | ...   print a, b, c, x, y, z
469 | 
470 | can be called with a tuple of three args (matching 'a', 'b', 'c'):
471 | 
472 | >>> tuple_in = (5, 6, 7)
473 | >>> test_call(*tuple_in)
474 | 5 6 7 1 2 3
475 | 
476 | with some optional keyword args:
477 | 
478 | >>> d = { 'x' : 'hello', 'y' : 'world' }
479 | >>> test_call(*tuple_in, **d)
480 | 5 6 7 hello world 3
481 | 
482 | Incidentally, this lets you implement the 'dict' constructor in one
483 | line!
484 | 
485 | >>> def dict_replacement(**kwargs):
486 | ...    return kwargs
487 | 


--------------------------------------------------------------------------------
/publish/idiomatic-python.txt:
--------------------------------------------------------------------------------
  1 | Idiomatic Python
  2 | ================
  3 | 
  4 | Extracts from `The Zen of Python
  5 | <http://www.python.org/doc/Humor.html#zen>`__ by Tim Peters:
  6 | 
  7 |   - Beautiful is better than ugly.
  8 |   - Explicit is better than implicit.
  9 |   - Simple is better than complex.
 10 |   - Readability counts.
 11 | 
 12 | (The whole Zen is worth reading...)
 13 | 
 14 | The first step in programming is getting stuff to work at all.
 15 | 
 16 | The next step in programming is getting stuff to work regularly.
 17 | 
 18 | The step after that is reusing code and designing for reuse.
 19 | 
 20 | Somewhere in there you will start writing idiomatic Python.
 21 | 
 22 | Idiomatic Python is what you write when the *only* thing you're
 23 | struggling with is the right way to solve *your* problem, and you're
 24 | not struggling with the programming language or some weird library
 25 | error or a nasty data retrieval issue or something else extraneous to
 26 | your real problem. The idioms you prefer may differ from the idioms I
 27 | prefer, but with Python there will be a fair amount of overlap,
 28 | because there is usually at most one obvious way to do every task.  (A
 29 | caveat: "obvious" is unfortunately the eye of the beholder, to some
 30 | extent.)
 31 | 
 32 | For example, let's consider the right way to keep track of the item number
 33 | while iterating over a list.  So, given a list z,
 34 | 
 35 | >>> z = [ 'a', 'b', 'c', 'd' ]
 36 | 
 37 | let's try printing out each item along with its index.
 38 | 
 39 | You could use a while loop:
 40 | 
 41 | >>> i = 0
 42 | >>> while i < len(z):
 43 | ...    print i, z[i]
 44 | ...    i += 1
 45 | 0 a
 46 | 1 b
 47 | 2 c
 48 | 3 d
 49 | 
 50 | or a for loop:
 51 | 
 52 | >>> for i in range(0, len(z)):
 53 | ...    print i, z[i]
 54 | 0 a
 55 | 1 b
 56 | 2 c
 57 | 3 d
 58 | 
 59 | but I think the clearest option is to use ``enumerate``:
 60 | 
 61 | >>> for i, item in enumerate(z):
 62 | ...    print i, item
 63 | 0 a
 64 | 1 b
 65 | 2 c
 66 | 3 d
 67 | 
 68 | Why is this the clearest option?  Well, look at the ZenOfPython extract
 69 | above: it's explicit (we used ``enumerate``); it's simple; it's readable;
 70 | and I would even argue that it's prettier than the while loop, if not
 71 | exactly "beatiful".
 72 | 
 73 | Python provides this kind of simplicity in as many places as possible, too.
 74 | Consider file handles; did you know that they were iterable?
 75 | 
 76 | >>> for line in file('data/listfile.txt'):
 77 | ...    print line.rstrip()
 78 | a
 79 | b
 80 | c
 81 | d
 82 | 
 83 | Where Python really shines is that this kind of simple idiom -- in
 84 | this case, iterables -- is very very easy not only to use but to
 85 | *construct* in your own code.  This will make your own code much more
 86 | reusable, while improving code readability dramatically.  And that's
 87 | the sort of benefit you will get from writing idiomatic Python.
 88 | 
 89 | Some basic data types
 90 | ---------------------
 91 | 
 92 | I'm sure you're all familiar with tuples, lists, and dictionaries, right?
 93 | Let's do a quick tour nonetheless.
 94 | 
 95 | 'tuples' are all over the place.  For example, this code for swapping two
 96 | numbers implicitly uses tuples:
 97 | 
 98 | >>> a = 5
 99 | >>> b = 6
100 | >>> a, b = b, a
101 | >>> print a == 6, b == 5
102 | True True
103 | 
104 | That's about all I have to say about tuples.
105 | 
106 | I use lists and dictionaries *all the time*.  They're the two greatest
107 | inventions of mankind, at least as far as Python goes.  With lists,
108 | it's just easy to keep track of stuff:
109 | 
110 | >>> x = []
111 | >>> x.append(5)
112 | >>> x.extend([6, 7, 8])
113 | >>> x
114 | [5, 6, 7, 8]
115 | >>> x.reverse()
116 | >>> x
117 | [8, 7, 6, 5]
118 | 
119 | It's also easy to sort.  Consider this set of data:
120 | 
121 | >>> y = [ ('IBM', 5), ('Zil', 3), ('DEC', 18) ]
122 | 
123 | The ``sort`` method will run ``cmp`` on each of the tuples,
124 | which sort on the first element of each tuple:
125 | 
126 | >>> y.sort()
127 | >>> y
128 | [('DEC', 18), ('IBM', 5), ('Zil', 3)]
129 | 
130 | Often it's handy to sort tuples on a different tuple element, and there
131 | are several ways to do that.  I prefer to provide my own sort method:
132 | 
133 | >>> def sort_on_second(a, b):
134 | ...   return cmp(a[1], b[1])
135 | 
136 | >>> y.sort(sort_on_second)
137 | >>> y
138 | [('Zil', 3), ('IBM', 5), ('DEC', 18)]
139 | 
140 | Note that here I'm using the builtin ``cmp`` method (which is what ``sort``
141 | uses by default: ``y.sort()`` is equivalent to ``y.sort(cmp)``) to do the
142 | comparison of the second part of the tuple.
143 | 
144 | This kind of function is really handy for sorting dictionaries by
145 | value, as I'll show you below.
146 | 
147 | (For a more in-depth discussion of sorting options, check out the
148 | `Sorting HowTo <http://wiki.python.org/moin/HowTo/Sorting>`__.)
149 | 
150 | On to dictionaries!
151 | 
152 | Your basic dictionary is just a hash table that takes keys and returns
153 | values:
154 | 
155 | >>> d = {}
156 | >>> d['a'] = 5
157 | >>> d['b'] = 4
158 | >>> d['c'] = 18
159 | >>> d
160 | {'a': 5, 'c': 18, 'b': 4}
161 | >>> d['a']
162 | 5
163 | 
164 | You can also initialize a dictionary using the ``dict`` type to create
165 | a dict object:
166 | 
167 | >>> e = dict(a=5, b=4, c=18)
168 | >>> e
169 | {'a': 5, 'c': 18, 'b': 4}
170 | 
171 | Dictionaries have a few really neat features that I use pretty frequently.
172 | For example, let's collect (key, value) pairs where we potentially have
173 | multiple values for each key.  That is, given a file containing this data, ::
174 | 
175 |   a 5
176 |   b 6
177 |   d 7
178 |   a 2
179 |   c 1
180 | 
181 | suppose we want to keep all the values?  If we just did it the simple way,
182 | 
183 | >>> d = {}
184 | >>> for line in file('data/keyvalue.txt'):
185 | ...   key, value = line.split()
186 | ...   d[key] = int(value)
187 | 
188 | we would lose all but the last value for each key:
189 | 
190 | >>> d
191 | {'a': 2, 'c': 1, 'b': 6, 'd': 7}
192 | 
193 | You can collect *all* the values by using ``get``:
194 | 
195 | >>> d = {}
196 | >>> for line in file('data/keyvalue.txt'):
197 | ...   key, value = line.split()
198 | ...   l = d.get(key, [])
199 | ...   l.append(int(value))
200 | ...   d[key] = l
201 | >>> d
202 | {'a': [5, 2], 'c': [1], 'b': [6], 'd': [7]}
203 | 
204 | The key point here is that ``d.get(k, default)`` is equivalent to
205 | ``d[k]`` if ``d[k]`` already exists; otherwise, it returns ``default``.
206 | So, the first time each key is used, ``l`` is set to an empty list;
207 | the value is appended to this list, and then the value is set for that
208 | key.
209 | 
210 | (There are tons of little tricks like the ones above, but these are the
211 | ones I use the most; see the Python Cookbook for an endless supply!)
212 | 
213 | Now let's try combining some of the sorting stuff above with
214 | dictionaries.  This time, our contrived problem is that we'd like to
215 | sort the keys in the dictionary ``d`` that we just loaded, but rather
216 | than sorting by key we want to sort by the sum of the values for each
217 | key.
218 | 
219 | First, let's define a sort function:
220 | 
221 | >>> def sort_by_sum_value(a, b):
222 | ...    sum_a = sum(a[1])
223 | ...    sum_b = sum(b[1])
224 | ...    return cmp(sum_a, sum_b)
225 | 
226 | Now apply it to the dictionary items:
227 | 
228 | >>> items = d.items()
229 | >>> items
230 | [('a', [5, 2]), ('c', [1]), ('b', [6]), ('d', [7])]
231 | >>> items.sort(sort_by_sum_value)
232 | >>> items
233 | [('c', [1]), ('b', [6]), ('a', [5, 2]), ('d', [7])]
234 | 
235 | and voila, you have your list of keys sorted by summed values!
236 | 
237 | As I said, there are tons and tons of cute little tricks that you can
238 | do with dictionaries.  I think they're incredibly powerful.
239 | 
240 | .. @CTB invert dictionary
241 | 
242 | List comprehensions
243 | -------------------
244 | 
245 | List comprehensions are neat little constructs that will shorten your
246 | lines of code considerably.  Here's an example that constructs a list
247 | of squares between 0 and 4:
248 | 
249 | >>> z = [ i**2 for i in range(0, 5) ]
250 | >>> z
251 | [0, 1, 4, 9, 16]
252 | 
253 | You can also add in conditionals, like requiring only even numbers:
254 | 
255 | >>> z = [ i**2 for i in range(0, 10) if i % 2 == 0 ]
256 | >>> z
257 | [0, 4, 16, 36, 64]
258 | 
259 | The general form is ::
260 | 
261 |     [ expression for var in list if conditional ]
262 | 
263 | so pretty much anything you want can go in ``expression`` and ``conditional``.
264 | 
265 | I find list comprehensions to be very useful for both file parsing and
266 | for simple math.  Consider a file containing data and comments: ::
267 | 
268 |   # this is a comment or a header
269 |   1
270 |   # another comment
271 |   2
272 | 
273 | where you want to read in the numbers only:
274 | 
275 | >>> data = [ int(x) for x in open('data/commented-data.txt') if x[0] != '#' ]
276 | >>> data
277 | [1, 2]
278 | 
279 | This is short, simple, and very explicit!
280 | 
281 | For simple math, suppose you need to calculate the average and stddev of
282 | some numbers.  Just use a list comprehension:
283 | 
284 | >>> import math
285 | >>> data = [ 1, 2, 3, 4, 5 ]
286 | >>> average = sum(data) / float(len(data))
287 | >>> stddev = sum([ (x - average)**2 for x in data ]) / float(len(data))
288 | >>> stddev = math.sqrt(stddev)
289 | >>> print average, '+/-', stddev
290 | 3.0 +/- 1.41421356237
291 | 
292 | Oh, and one rule of thumb: if your list comprehension is longer than
293 | one line, change it to a for loop; it will be easier to read, and easier
294 | to understand.
295 | 
296 | Building your own types
297 | -----------------------
298 | 
299 | Most people should be pretty familiar with basic classes.
300 | 
301 | >>> class A:
302 | ...   def __init__(self, item):
303 | ...      self.item = item
304 | ...   def hello(self):
305 | ...      print 'hello,', self.item
306 | 
307 | >>> x = A('world')
308 | >>> x.hello()
309 | hello, world
310 | 
311 | There are a bunch of neat things you can do with classes, but one of
312 | the neatest is building new types that can be used with standard
313 | Python list/dictionary idioms.
314 | 
315 | For example, let's consider a basic binning class.
316 | 
317 | >>> class Binner:
318 | ...   def __init__(self, binwidth, binmax):
319 | ...     self.binwidth, self.binmax = binwidth, binmax
320 | ...     nbins = int(binmax / float(binwidth) + 1)
321 | ...     self.bins = [0] * nbins
322 | ...
323 | ...   def add(self, value):
324 | ...     bin = value / self.binwidth
325 | ...     self.bins[bin] += 1
326 | 
327 | This behaves as you'd expect:
328 | 
329 | >>> binner = Binner(5, 20)
330 | >>> for i in range(0,20):
331 | ...   binner.add(i)
332 | >>> binner.bins
333 | [5, 5, 5, 5, 0]
334 | 
335 | ...but wouldn't it be nice to be able to write this? ::
336 | 
337 |    for i in range(0, len(binner)):
338 |       print i, binner[i]
339 | 
340 | or even this? ::
341 | 
342 |    for i, bin in enumerate(binner):
343 |       print i, bin
344 | 
345 | This is actually quite easy, if you make the ``Binner`` class look like a
346 | list by adding two special functions:
347 | 
348 | >>> class Binner:
349 | ...   def __init__(self, binwidth, binmax):
350 | ...     self.binwidth, self.binmax = binwidth, binmax
351 | ...     nbins = int(binmax / float(binwidth) + 1)
352 | ...     self.bins = [0] * nbins
353 | ...
354 | ...   def add(self, value):
355 | ...     bin = value / self.binwidth
356 | ...     self.bins[bin] += 1
357 | ...
358 | ...   def __getitem__(self, index):
359 | ...     return self.bins[index]
360 | ...
361 | ...   def __len__(self):
362 | ...     return len(self.bins)
363 | 
364 | >>> binner = Binner(5, 20)
365 | >>> for i in range(0,20):
366 | ...   binner.add(i)
367 | 
368 | and now we can treat ``Binner`` objects as normal lists:
369 | 
370 | >>> for i in range(0, len(binner)):
371 | ...   print i, binner[i]
372 | 0 5
373 | 1 5
374 | 2 5
375 | 3 5
376 | 4 0
377 | 
378 | >>> for n in binner:
379 | ...   print n
380 | 5
381 | 5
382 | 5
383 | 5
384 | 0
385 | 
386 | In the case of ``len(binner)``, Python knows to use the special method
387 | ``__len__``, and likewise ``binner[i]`` just calls ``__getitem__(i)``.
388 | 
389 | The second case involves a bit more implicit magic.  Here, Python figures
390 | out that ``Binner`` can act like a list and simply calls the right functions
391 | to retrieve the information.
392 | 
393 | Note that making your own read-only dictionaries is pretty simple, too:
394 | just provide the ``__getitem__`` function, which is called for non-integer
395 | values as well:
396 | 
397 | >>> class SillyDict:
398 | ...    def __getitem__(self, key):
399 | ...       print 'key is', key
400 | ...       return key
401 | >>> sd = SillyDict()
402 | >>> x = sd['hello, world']
403 | key is hello, world
404 | >>> x
405 | 'hello, world'
406 | 
407 | You can also write your own mutable types, e.g.
408 | 
409 | >>> class SillyDict:
410 | ...   def __setitem__(self, key, value):
411 | ...      print 'setting', key, 'to', value
412 | >>> sd = SillyDict()
413 | >>> sd[5] = 'world'
414 | setting 5 to world
415 | 
416 | but I have found this to be less useful in my own code, where I'm
417 | usually writing special objects like the ``Binner`` type above: I
418 | prefer to specify my own methods for putting information *into* the
419 | object type, because it reminds me that it is not a generic Python
420 | list or dictionary.  However, the use of ``__getitem__`` (and some of
421 | the iterator and generator features I discuss below) can make code *much*
422 | more readable, and so I use them whenever I think the meaning will be
423 | unambiguous.   For example, with the ``Binner`` type, the purpose of
424 | ``__getitem__`` and ``__len__`` is not very ambiguous, while the
425 | purpose of a ``__setitem__`` function (to support ``binner[x] = y``)
426 | would be unclear.
427 |  
428 | Overall, the creation of your own custom list and dict types is one
429 | way to make reusable code that will fit nicely into Python's natural
430 | idioms.  In turn, this can make your code look much simpler and feel
431 | much cleaner.  The risk, of course, is that you will also make your
432 | code harder to understand and (if you're not careful) harder to debug.
433 | Mediating between these options is mostly a matter of experience.
434 | 
435 | .. @CTB __getattr__ trick
436 | 
437 | Iterators
438 | ---------
439 | 
440 | Iterators are another built-in Python feature; unlike the list and
441 | dict types we discussed above, an iterator isn't really a *type*, but
442 | a *protocol*.  This just means that Python agrees to respect anything
443 | that supports a particular set of methods as if it were an iterator.
444 | (These protocols appear everywhere in Python; we were taking advantage
445 | of the mapping and sequence protocols above, when we defined
446 | ``__getitem__`` and ``__len__``, respectively.)
447 | 
448 | Iterators are more general versions of the sequence protocol; here's an
449 | example:
450 | 
451 | >>> class SillyIter:
452 | ...   i = 0
453 | ...   n = 5
454 | ...   def __iter__(self):
455 | ...      return self
456 | ...   def next(self):
457 | ...      self.i += 1
458 | ...      if self.i > self.n:
459 | ...         raise StopIteration
460 | ...      return self.i
461 | 
462 | >>> si = SillyIter()
463 | >>> for i in si:
464 | ...   print i
465 | 1
466 | 2
467 | 3
468 | 4
469 | 5
470 | 
471 | Here, ``__iter__`` just returns ``self``, an object that has the
472 | function ``next()``, which (when called) either returns a value or
473 | raises a StopIteration exception.
474 | 
475 | We've actually already met several iterators in disguise; in particular,
476 | ``enumerate`` is an iterator.  To drive home the point, here's a simple
477 | reimplementation of ``enumerate``:
478 | 
479 | >>> class my_enumerate:
480 | ...   def __init__(self, some_iter):
481 | ...      self.some_iter = iter(some_iter)
482 | ...      self.count = -1
483 | ...
484 | ...   def __iter__(self):
485 | ...      return self
486 | ...
487 | ...   def next(self):
488 | ...      val = self.some_iter.next()
489 | ...      self.count += 1
490 | ...      return self.count, val
491 | >>> for n, val in my_enumerate(['a', 'b', 'c']):
492 | ...   print n, val
493 | 0 a
494 | 1 b
495 | 2 c
496 | 
497 | You can also iterate through an iterator the "old-fashioned" way:
498 | 
499 | >>> some_iter = iter(['a', 'b', 'c'])
500 | >>> while 1:
501 | ...   try:
502 | ...      print some_iter.next()
503 | ...   except StopIteration:
504 | ...      break
505 | a
506 | b
507 | c
508 | 
509 | but that would be silly in most situations! I use this if I just want
510 | to get the first value or two from an iterator.
511 | 
512 | With iterators, one thing to watch out for is the return of ``self`` from
513 | the ``__iter__`` function.  You can all too easily write an iterator that
514 | isn't as re-usable as you think it is.  For example, suppose you had
515 | the following class:
516 | 
517 | >>> class MyTrickyIter:
518 | ...   def __init__(self, thelist):
519 | ...      self.thelist = thelist
520 | ...      self.index = -1
521 | ...
522 | ...   def __iter__(self):
523 | ...      return self
524 | ...
525 | ...   def next(self):
526 | ...      self.index += 1
527 | ...      if self.index < len(self.thelist):
528 | ...         return self.thelist[self.index]
529 | ...      raise StopIteration
530 | 
531 | This works just like you'd expect as long as you create a new object each
532 | time:
533 | 
534 | >>> for i in MyTrickyIter(['a', 'b']):
535 | ...   for j in MyTrickyIter(['a', 'b']):
536 | ...      print i, j
537 | a a
538 | a b
539 | b a
540 | b b
541 | 
542 | but it will break if you create the object just once:
543 | 
544 | >>> mi = MyTrickyIter(['a', 'b'])
545 | >>> for i in mi:
546 | ...   for j in mi:
547 | ...      print i, j
548 | a b
549 | 
550 | because self.index is incremented in each loop.
551 | 
552 | Generators
553 | ----------
554 | 
555 | Generators are a Python implementation of `coroutines
556 | <http://en.wikipedia.org/wiki/Coroutine>`__.  Essentially, they're
557 | functions that let you suspend execution and return a result:
558 | 
559 | >>> def g():
560 | ...   for i in range(0, 5):
561 | ...      yield i**2
562 | >>> for i in g():
563 | ...    print i
564 | 0
565 | 1
566 | 4
567 | 9
568 | 16
569 | 
570 | You could do this with a list just as easily, of course:
571 | 
572 | >>> def h():
573 | ...   return [ x ** 2 for x in range(0, 5) ]
574 | >>> for i in h():
575 | ...    print i
576 | 0
577 | 1
578 | 4
579 | 9
580 | 16
581 | 
582 | But you can do things with generators that you couldn't do with finite
583 | lists.  Consider two full implementation of Eratosthenes' Sieve for
584 | finding prime numbers, below.
585 | 
586 | First, let's define some boilerplate code that can be used by either
587 | implementation:
588 | 
589 | >>> def divides(primes, n):
590 | ...   for trial in primes:
591 | ...      if n % trial == 0: return True
592 | ...   return False
593 | 
594 | Now, let's write a simple sieve with a generator:
595 | 
596 | >>> def prime_sieve():
597 | ...    p, current = [], 1
598 | ...    while 1:
599 | ...        current += 1
600 | ...        if not divides(p, current): # if any previous primes divide, cancel
601 | ...            p.append(current)           # this is prime! save & return
602 | ...            yield current
603 | 
604 | This implementation will find (within the limitations of Python's math
605 | functions) all prime numbers; the programmer has to stop it herself:
606 | 
607 | >>> for i in prime_sieve():
608 | ...    print i
609 | ...    if i > 10:
610 | ...        break
611 | 2
612 | 3
613 | 5
614 | 7
615 | 11
616 | 
617 | So, here we're using a generator to implement the generation of an
618 | infinite series with a single function definition.  To do the equivalent
619 | with an iterator would require a class, so that the object instance can
620 | hold the variables:
621 | 
622 | >>> class iterator_sieve:
623 | ...    def __init__(self):
624 | ...       self.p, self.current = [], 1
625 | ...    def __iter__(self):
626 | ...       return self
627 | ...    def next(self):
628 | ...       while 1:
629 | ...          self.current = self.current + 1
630 | ...          if not divides(self.p, self.current):
631 | ...             self.p.append(self.current)
632 | ...             return self.current
633 | 
634 | >>> for i in iterator_sieve():
635 | ...    print i
636 | ...    if i > 10:
637 | ...        break
638 | 2
639 | 3
640 | 5
641 | 7
642 | 11
643 | 
644 | It is also *much* easier to write routines like ``enumerate`` as a
645 | generator than as an iterator:
646 | 
647 | >>> def gen_enumerate(some_iter):
648 | ...   count = 0
649 | ...   for val in some_iter:
650 | ...      yield count, val
651 | ...      count += 1
652 | 
653 | >>> for n, val in gen_enumerate(['a', 'b', 'c']):
654 | ...   print n, val
655 | 0 a
656 | 1 b
657 | 2 c
658 | 
659 | Abstruse note: we don't even have to catch ``StopIteration`` here, because
660 | the for loop simply ends when ``some_iter`` is done!
661 | 
662 | assert
663 | ------
664 | 
665 | One of the most underused keywords in Python is ``assert``.  Assert is
666 | pretty simple: it takes a boolean, and if the boolean evaluates to
667 | False, it fails (by raising an AssertionError exception).  ``assert True``
668 | is a no-op.
669 | 
670 | >>> assert True
671 | >>> assert False
672 | Traceback (most recent call last):
673 |    ...
674 | AssertionError
675 | 
676 | You can also put an optional message in:
677 | 
678 | >>> assert False, "you can't do that here!"
679 | Traceback (most recent call last):
680 |    ...
681 | AssertionError: you can't do that here!
682 | 
683 | ``assert`` is very, very useful for making sure that code is behaving
684 | according to your expectations during development.  Worried that
685 | you're getting an empty list?  ``assert len(x)``.  Want to make sure
686 | that a particular return value is not None?  ``assert retval is not
687 | None``.
688 | 
689 | Also note that 'assert' statements are removed from optimized code, so only
690 | use them to conditions related to actual development, and make sure that
691 | the statement you're evaluating has no side effects.  For example,
692 | 
693 | >>> a = 1
694 | >>> def check_something():
695 | ...   global a
696 | ...   a = 5
697 | ...   return True
698 | >>> assert check_something()
699 | 
700 | will behave differently when run under optimization than when run without
701 | optimization, because the ``assert`` line will be removed completely from
702 | optimized code.
703 | 
704 | If you need to raise an exception in production code, see below.  The
705 | quickest and dirtiest way is to just "raise Exception", but that's kind
706 | of non-specific ;).
707 | 
708 | Conclusions
709 | -----------
710 | 
711 | Use of common Python idioms -- both in your python code and for your
712 | new types -- leads to short, sweet programs.
713 | 


--------------------------------------------------------------------------------
/publish/increasing-performance.txt:
--------------------------------------------------------------------------------
  1 | Measuring and Increasing Performance
  2 | ====================================
  3 | 
  4 | "Premature optimization is the root of all evil (or at least most of
  5 | it) in programming."  Donald Knuth.
  6 | 
  7 | In other words, know thy code!  The only way to find performance
  8 | bottlenecks is to profile your code.  Unfortunately, the situation is
  9 | a bit more complex in Python than you would like it to be: see
 10 | http://docs.python.org/lib/profile.html.  Briefly, there are three
 11 | (!?) standard profiling systems that come with Python: profile,
 12 | cProfile (only since python 2.5!), and hotshot (thought note that
 13 | profile and cProfile are Python and C implementations of the same
 14 | API).  There is also a separately maintained one called statprof, that
 15 | I nominally maintain.
 16 | 
 17 | The ones included with Python are deterministic profilers, while
 18 | statprof is a statistical profiler.  What's the difference? To steal
 19 | from the Python docs:
 20 | 
 21 |    Deterministic profiling is meant to reflect the fact that all function
 22 |    call, function return, and exception events are monitored, and precise
 23 |    timings are made for the intervals between these events (during which
 24 |    time the user's code is executing). In contrast, statistical profiling
 25 |    randomly samples the effective instruction pointer, and deduces where
 26 |    time is being spent. The latter technique traditionally involves less
 27 |    overhead (as the code does not need to be instrumented), but provides
 28 |    only relative indications of where time is being spent.
 29 |    
 30 | Let's go to the examples.  Suppose we have two functions 'count1'
 31 | and 'count2', and we want to run both and see where time is spent.
 32 | 
 33 | -----
 34 | 
 35 | Here's some example hotshot code: ::
 36 | 
 37 |    import hotshot, hotshot.stats
 38 |    prof = hotshot.Profile('hotshot.prof')
 39 |    prof.runcall(count1)
 40 |    prof.runcall(count2)
 41 |    prof.close()
 42 |    stats = hotshot.stats.load('hotshot.prof')
 43 |    stats.sort_stats('time', 'calls')
 44 |    stats.print_stats(20)
 45 | 
 46 | and the resulting output: ::
 47 | 
 48 |          2 function calls in 5.769 CPU seconds
 49 | 
 50 |    Ordered by: internal time, call count
 51 | 
 52 |    ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 53 |         1    4.335    4.335    4.335    4.335 count.py:8(count2)
 54 |         1    1.434    1.434    1.434    1.434 count.py:1(count1)
 55 |         0    0.000             0.000          profile:0(profiler)
 56 | 
 57 | -----
 58 | 
 59 | Here's some example cProfile code: ::
 60 | 
 61 |    def runboth():
 62 |        count1()
 63 |        count2()
 64 |    
 65 |    import cProfile, pstats
 66 |    cProfile.run('runboth()', 'cprof.out')
 67 |    
 68 |    p = pstats.Stats('cprof.out')
 69 |    p.sort_stats('time').print_stats(10)
 70 |    
 71 | and the resulting output: ::
 72 | 
 73 |    Wed Jun 13 00:11:55 2007    cprof.out
 74 |    
 75 |             7 function calls in 5.643 CPU seconds
 76 |    
 77 |       Ordered by: internal time
 78 |    
 79 |       ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 80 |            1    3.817    3.817    4.194    4.194 count.py:8(count2)
 81 |            1    1.282    1.282    1.450    1.450 count.py:1(count1)
 82 |            2    0.545    0.272    0.545    0.272 {range}
 83 |            1    0.000    0.000    5.643    5.643 run-cprofile:8(runboth)
 84 |            1    0.000    0.000    5.643    5.643 <string>:1(<module>)
 85 |            1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
 86 |    
 87 | -----
 88 | 
 89 | And here's an example of statprof, the statistical profiler: ::
 90 | 
 91 |    import statprof
 92 |    statprof.start()
 93 |    count1()
 94 |    count2()
 95 |    statprof.stop()
 96 |    statprof.display()
 97 | 
 98 | And the output: ::
 99 | 
100 |      %   cumulative      self
101 |     time    seconds   seconds  name
102 |     74.66      4.10      4.10  count.py:8:count2
103 |     25.34      1.39      1.39  count.py:1:count1
104 |      0.00      5.49      0.00  run-statprof:2:<module>
105 |    ---
106 |    Sample count: 296
107 |    Total time: 5.490000 seconds
108 | 
109 | Which profiler should you use?
110 | ------------------------------
111 | 
112 | statprof used to report more accurate numbers than hotshot or
113 | cProfile, because hotshot and cProfile had to instrument the code
114 | (insert tracing statements, basically).  However, the numbers shown
115 | above are pretty similar to each other and I'm not sure there's much
116 | of a reason to choose between them any more.  So, I recommend starting
117 | with cProfile, because it's the officially supported one.
118 | 
119 | One note -- none of these profilers really work all that well with
120 | threads, for a variety of reasons.  You're best off doing performance
121 | measurements on non-threaded code.
122 | 
123 | Measuring code snippets with timeit
124 | -----------------------------------
125 | 
126 | There's also a simple timing tool called timeit: ::
127 | 
128 |    from timeit import Timer
129 |    from count import *
130 |    
131 |    t1 = Timer("count1()", "from count import count1")
132 |    print 'count1:', t1.timeit(number=1)
133 |    
134 |    t2 = Timer("count2()", "from count import count2")
135 |    print 'count2:', t2.timeit(number=1)
136 | 
137 | Speeding Up Python
138 | ==================
139 | 
140 | There are a couple of options for speeding up Python.
141 | 
142 | psyco
143 | -----
144 | 
145 | (Taken almost verbatim from the `psyco introduction <http://psyco.sourceforge.net/introduction.html>`__!)
146 | 
147 | psyco is a specializing compiler that lets you run your existing
148 | Python code much faster, with *absolutely no change* in your source
149 | code.  It acts like a just-in-time compiler by rewriting several
150 | versions of your code blocks and then optimizing them by specializing
151 | the variables they use.
152 | 
153 | The main benefit is that you get a 2-100x speed-up with an unmodified Python
154 | interpreter and unmodified source code.  (You just need to import psyco.)
155 | 
156 | The main drawbacks are that it only runs on i386-compatible processors
157 | (so, not PPC Macs) and it's a bit of a memory hog.
158 | 
159 | For example, if you use the prime number generator generator code (see
160 | `Idiomatic Python <idiomatic-python.txt>`__) to generate all primes
161 | under 100000, it takes about 10.4 seconds on my development server.
162 | With psyco, it takes about 1.6 seconds (that's about a 6x speedup).
163 | Even when doing less numerical stuff, I see at least a 2x speedup.
164 | 
165 | Installing psyco
166 | ~~~~~~~~~~~~~~~~
167 | 
168 | (Note: psyco is an extension module and does not come in pre-compiled
169 | form.  Therefore, you will need to have a Python-compatible C compiler
170 | installed in order to install psyco.)
171 | 
172 | Grab the latest psyco snapshot from here: ::
173 | 
174 |   http://psyco.sourceforge.net/psycoguide/sources.html
175 | 
176 | unpack it, and run 'python setup.py install'.
177 | 
178 | Using psyco
179 | ~~~~~~~~~~~
180 | 
181 | Put the following code at the top of your __main__ Python script: ::
182 | 
183 |   try:
184 |      import psyco
185 |      psyco.full()
186 |   except ImportError:
187 |      pass
188 | 
189 | ...and you're done.  (Yes, it's magic!)
190 | 
191 | The only place where psyco won't help you much is when you have
192 | already recoded the CPU-intensive component of your code into an
193 | extension module.
194 | 
195 | pyrex
196 | -----
197 | 
198 | pyrex is a Python-like language used to create C modules for Python.
199 | You can use it for two purposes: to increase performance by
200 | (re)writing your code in C (but with a friendly extension language),
201 | and to make C libraries available to Python.
202 | 
203 | In the context of speeding things up, here's an example program: ::
204 | 
205 |    def primes(int maxprime):
206 |      cdef int n, k, i
207 |      cdef int p[100000]
208 |      result = []
209 |      k = 0
210 |      n = 2
211 |      while n < maxprime:
212 |        i = 0
213 |    
214 |        # test against previous primes
215 |        while i < k and n % p[i] <> 0:
216 |          i = i + 1
217 |    
218 |        # prime? if so, save.
219 |        if i == k:
220 |          p[k] = n
221 |          k = k + 1
222 |          result.append(n)
223 |        n = n + 1
224 |    
225 |      return result
226 | 
227 | To compile this, you would execute: ::
228 | 
229 |    pyrexc primes.pyx
230 |    gcc -c -fPIC -I /usr/local/include/python2.5 primes.c
231 |    gcc -shared primes.o -o primes.so
232 | 
233 | Or, more nicely, you can write a setup.py using some of the Pyrex
234 | helper functions: ::
235 | 
236 |    from distutils.core import setup
237 |    from distutils.extension import Extension
238 |    from Pyrex.Distutils import build_ext		# <--
239 |    
240 |    setup(
241 |      name = "primes",
242 |      ext_modules=[ 
243 |        Extension("primes", ["primes.pyx"], libraries = [])
244 |        ],
245 |      cmdclass = {'build_ext': build_ext}
246 |    )
247 | 
248 | A few notes:
249 | 
250 |  - 'cdef' is a C definition statement
251 |  - this is a "python-alike" language but not Python, per se ;)
252 |  - pyrex does handle a lot of the nasty C extension stuff for you.
253 | 
254 | There's an excellent guide to Pyrex available online here:
255 | http://ldots.org/pyrex-guide/.
256 | 
257 | I haven't used Pyrex much myself, but I have a friend who swears by
258 | it.  My concerns are that it's a "C/Python-alike" language but not C
259 | or Python, and I have already memorized too many weird rules about too
260 | many languages!
261 | 
262 | We'll encounter Pyrex a bit further down the road in the context of
263 | linking existing C/C++ code into your own code.
264 | 
265 | .. @CTB will we?? ;)
266 | 


--------------------------------------------------------------------------------
/publish/index.txt:
--------------------------------------------------------------------------------
 1 | .. Intermediate and Advanced Software Carpentry documentation master file, created by
 2 |    sphinx-quickstart on Tue Jul  3 21:50:34 2012.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to Intermediate and Advanced Software Carpentry!
 7 | ========================================================
 8 | 
 9 | :Author: C Titus Brown
10 | :Date: June 18, 2007
11 | 
12 | Welcome!  You have stumbled upon the class handouts for a course I
13 | taught at Lawrence Livermore National Lab, June 12-June 14, 2007.
14 | 
15 | These notes are intended to *accompany* my lecture, which was a
16 | demonstration of a variety of "intermediate" Python features and
17 | packages.  Because the demonstration was interactive, these notes are
18 | not complete notes of what went on in the course.  (Sorry about that;
19 | they *have* been updated from my actual handouts to be more
20 | complete...)
21 | 
22 | However, all 70 pages are free to view and print, so enjoy.
23 | 
24 | All errors are, of course, my own.  Note that almost all of the
25 | examples starting with '>>>' are doctests, so you can take `the source
26 | <combined.txt>`__ and run doctest on it to make sure I'm being honest.
27 | But do me a favor and run the doctests with Python 2.5 ;).
28 | 
29 | Note that Day 1 of the course ran through the end of "Testing Your
30 | Software"; Day 2 ran through the end of "Online Resources for Python";
31 | and Day 3 finished it off.
32 | 
33 | Example code (mostly from the C extension sections) is available `here
34 | <https://github.com/ctb/advanced-swc/raw/master/publish/code.tar.gz>`__; see the `README <https://raw.github.com/ctb/advanced-swc/master/publish/code/README.txt>`__ for more information.
35 | 
36 | Contents:
37 | 
38 | .. toctree::
39 |    :maxdepth: 2
40 | 
41 |    day1
42 |    day2
43 |    day3
44 | 
45 | 
46 | Indices and tables
47 | ==================
48 | 
49 | * :ref:`genindex`
50 | * :ref:`modindex`
51 | * :ref:`search`
52 | 
53 | 


--------------------------------------------------------------------------------
/publish/multiprocessing.txt:
--------------------------------------------------------------------------------
  1 | Packages for Multiprocessing
  2 | ============================
  3 | 
  4 | threading
  5 | ---------
  6 | 
  7 | Python has basic support for threading built in: for example, here's a
  8 | program that runs two threads, each of which prints out messages after
  9 | sleeping a particular amount of time: ::
 10 | 
 11 |    from threading import Thread, local
 12 |    import time
 13 |    
 14 |    class MessageThread(Thread):
 15 |        def __init__(self, message, sleep):
 16 |            self.message = message
 17 |            self.sleep = sleep
 18 |            Thread.__init__(self)		# remember to run Thread init!
 19 |    
 20 |        def run(self):				# automatically run by 'start'
 21 |            i = 0
 22 |            while i < 50:
 23 |                i += 1
 24 |                print i, self.message
 25 |    
 26 |                time.sleep(self.sleep)
 27 |    
 28 |    t1 = MessageThread("thread - 1", 1)
 29 |    t2 = MessageThread("thread - 2", 2)
 30 |    
 31 |    t1.start()
 32 |    t2.start()
 33 | 
 34 | However, due to the existence of the Global Interpreter Lock (GIL)
 35 | (http://docs.python.org/api/threads.html), CPU-intensive code will
 36 | not run faster on dual-core CPUs than it will on single-core CPUs.
 37 | 
 38 | Briefly, the idea is that the Python interpreter holds a global lock,
 39 | and no Python code can be executed without holding that lock.  (Code
 40 | execution will still be interleaved, but no two Python instructions
 41 | can execute at the same time.) Therefore, any Python code that you
 42 | write (or GIL-naive C/C++ extension code) will not take advantage of
 43 | multiple CPUs.
 44 | 
 45 | This is intentional:   
 46 |    
 47 |    http://mail.python.org/pipermail/python-3000/2007-May/007414.html
 48 | 
 49 | There is a long history of wrangling about the GIL, and there are a couple
 50 | of good arguments for it.  Briefly,
 51 | 
 52 |  - it dramatically simplifies writing C extension code, because by
 53 |    default, C extension code does not need to know anything about
 54 |    threads.
 55 | 
 56 |  - putting in locks appropriately to handle places where contention
 57 |    might occur is not only error-prone but makes the code quite slow;
 58 |    locks really affect performance.
 59 | 
 60 |  - threaded code is difficult to debug, and most people don't need it,
 61 |    despite having been brainwashed to think that they do ;).
 62 | 
 63 | But we don't care about that: *we* do want our code to run on multiple
 64 | CPUs.  So first, let's dip back into C code: what do we have to do to
 65 | make our C code release the GIL so that it can do a long computation?
 66 | 
 67 | Basically, just wrap I/O blocking code or CPU-intensive code in the
 68 | following macros: ::
 69 | 
 70 |    Py_BEGIN_ALLOW_THREADS
 71 | 
 72 |    ...Do some time-consuming operation...
 73 | 
 74 |    Py_END_ALLOW_THREADS
 75 | 
 76 | This is actually pretty easy to do to your C code, and it does result
 77 | in that code being run in parallel on multi-core CPUs.  (note:
 78 | example?)
 79 | 
 80 | The big problem with the GIL, however, is that it really means that you
 81 | simply can't write parallel code in Python without jumping through some
 82 | kind of hoop.  Below, we discuss a couple of these hoops ;).
 83 | 
 84 | Writing (and indicating) threadsafe C extensions
 85 | ------------------------------------------------
 86 | 
 87 | Suppose you had some CPU-expensive C code: ::
 88 | 
 89 |    void waste_time() {
 90 | 	int i, n;
 91 | 	for (i = 0; i < 1024*1024*1024; i++) {
 92 | 	    if ((i % 2) == 0) n++;
 93 | 	}
 94 |    }
 95 | 
 96 | and you wrapped this in a Python function: ::
 97 | 
 98 |    PyObject * waste_time_fn(PyObject * self, PyObject * args) {
 99 | 	waste_time();
100 |    }
101 | 
102 | Now, left like this, any call to ``waste_time_fn`` will cause all
103 | Python threads and processes to block, waiting for ``waste_time`` to
104 | finish.  That's silly, though -- ``waste_time`` is clearly threadsafe,
105 | because it uses only local variables!
106 | 
107 | To tell Python that you are engaged in some expensive operations that
108 | are threadsafe, just enclose the waste_time code like so: ::
109 | 
110 |    PyObject * waste_time_fn(PyObject * self, PyObject * args) {
111 |         Py_BEGIN_ALLOW_THREADS
112 | 
113 | 	waste_time();
114 | 
115 | 	Py_END_ALLOW_THREADS
116 |    }
117 | 
118 | This code will now be run in parallel when threading is used.  One
119 | caveat: you can't do *any* call to the Python C API in the code
120 | between the Py_BEGIN_ALLOW_THREADS and Py_END_ALLOW_THREADS, because
121 | the Python C API is not threadsafe.
122 | 
123 | parallelpython
124 | --------------
125 | 
126 | parallelpython is a system for controlling multiple Python processes on
127 | multiple machines.  Here's an example program: ::
128 | 
129 |    #!/usr/bin/python
130 |    def isprime(n):
131 |        """Returns True if n is prime and False otherwise"""
132 |        import math
133 |    
134 |        if n < 2:
135 |            return False
136 |        if n == 2:
137 |            return True
138 |        max = int(math.ceil(math.sqrt(n)))
139 |        i = 2
140 |        while i <= max:
141 |            if n % i == 0:
142 |                return False
143 |            i += 1
144 |        return True
145 |    
146 |    def sum_primes(n):
147 |        """Calculates sum of all primes below given integer n"""
148 |        return sum([x for x in xrange(2, n) if isprime(x)])
149 |    
150 |    ####
151 |    
152 |    import sys, time
153 |    
154 |    import pp
155 |    # Creates jobserver with specified number of workers
156 |    job_server = pp.Server(ncpus=int(sys.argv[1]))
157 |    
158 |    print "Starting pp with", job_server.get_ncpus(), "workers"
159 |    
160 |    start_time = time.time()
161 |    
162 |    # Submit a job of calulating sum_primes(100) for execution.
163 |    #
164 |    #    * sum_primes - the function
165 |    #    * (input,) - tuple with arguments for sum_primes
166 |    #    * (isprime,) - tuple with functions on which sum_primes depends
167 |    #
168 |    # Execution starts as soon as one of the workers will become available
169 |    
170 |    inputs = (100000, 100100, 100200, 100300, 100400, 100500, 100600, 100700)
171 |    
172 |    jobs = []
173 |    for input in inputs:
174 |        job = job_server.submit(sum_primes, (input,), (isprime,))
175 |        jobs.append(job)
176 |    
177 |    for job, input in zip(jobs, inputs):
178 |        print "Sum of primes below", input, "is", job()
179 |    
180 |    print "Time elapsed: ", time.time() - start_time, "s"
181 |    job_server.print_stats()
182 | 
183 | If you add "ppservers=('host1')" to to the line ::
184 | 
185 |     pp.Server(...)
186 | 
187 | pp will check for parallelpython servers running on those other hosts and
188 | send jobs to them as well.
189 | 
190 | The way parallelpython works is it literally sends the Python code across
191 | the network & evaluates it there!  It seems to work well.
192 | 
193 | Rpyc
194 | ----
195 | 
196 | `Rpyc <http://rpyc.wikispaces.com/>`__ is a remote procedure call system
197 | built in (and tailored to) Python.  It is basically a way to transparently
198 | control remove Python processes.  For example, here's some code that will
199 | connect to an Rpyc server and ask the server to calculate the first
200 | 500 prime numbers:
201 | 
202 |    from Rpyc import SocketConnection
203 |    
204 |    # connect to the "remote" server
205 |    c = SocketConnection("localhost")
206 | 
207 |    # make sure it has the right code in its path
208 |    c.modules.sys.path.append('/u/t/dev/misc/rpyc') 
209 |    
210 |    # tell it to execute 'primestuff.get_n_primes'
211 |    primes = c.modules.primestuff.get_n_primes(500) 
212 |    print primes[-20:]
213 | 
214 | Note that this is a synchronous connection, so the client waits for the
215 | result; you could also have it do the computation asynchronously, leaving
216 | the client free to request results from other servers.
217 | 
218 | In terms of parallel computing, the server has to be controlled
219 | directly, which makes it less than ideal.  I think parallelpython
220 | is a better choice for straightforward number crunching.
221 | 
222 | pyMPI
223 | -----
224 | 
225 | pyMPI is a nice Python implementation to the MPI (message-passing
226 | interface) library.  MPI enables different processors to communicate
227 | with each other.  I can't demo pyMPI, because I couldn't get it to
228 | work on my other machine, but here's some example code that computs pi
229 | to a precision of 1e-6 on however many machines you have running MPI. ::
230 | 
231 |    import random
232 |    import mpi
233 |    
234 |    def computePi(nsamples):
235 |        rank, size = mpi.rank, mpi.size
236 |        oldpi, pi, mypi = 0.0,0.0,0.0
237 |        
238 |        done = False
239 |        while(not done):
240 |            inside = 0
241 |            for i in xrange(nsamples):
242 |                x = random.random()
243 |                y = random.random()
244 |                if ((x*x)+(y*y)<1):
245 |                    inside+=1
246 |            
247 |            oldpi = pi
248 |            mypi = (inside * 1.0)/nsamples
249 |            pi =  (4.0 / mpi.size) * mpi.allreduce(mypi, mpi.SUM) 
250 |            
251 |            delta = abs(pi - oldpi)
252 |            if(mpi.rank==0):
253 |                print "pi:",pi," - delta:",delta
254 |            if(delta < 0.00001):
255 |                done = True
256 |        return pi
257 |    
258 |    if __name__=="__main__":
259 |        pi = computePi(10000)
260 |        if(mpi.rank==0):
261 |            print "Computed value of pi on",mpi.size,"processors is",pi
262 | 
263 | One big problem with MPI is that documentation is essentially absent, but
264 | I can still make a few points ;).
265 | 
266 | First, the "magic" happens in the 'allreduce' function up above, where
267 | it sums the results from all of the machines and then divides by the
268 | number of machines.
269 | 
270 | Second, pyMPI takes the unusual approach of actually building an
271 | MPI-aware Python interpreter, so instead of running your scripts in
272 | normal Python, you run them using 'pyMPI'.
273 | 
274 | multitask
275 | ---------
276 | 
277 | multitask is not a multi-machine mechanism; it's a library that
278 | implements cooperative multitasking around I/O operations.  Briefly,
279 | whenever you're going to do an I/O operation (like wait for more
280 | data from the network) you can tell multitask to yield to another
281 | thread of control.  Here is a simple example where control is voluntarily
282 | yielded after a 'print': ::
283 | 
284 |   import multitask
285 | 
286 |    def printer(message):
287 |        while True:
288 |            print message
289 |            yield
290 |    
291 |    multitask.add(printer('hello'))
292 |    multitask.add(printer('goodbye'))
293 |    multitask.run()
294 | 
295 | Here's another example from the home page: ::
296 | 
297 |    import multitask
298 | 
299 |    def listener(sock):
300 |        while True:
301 |            conn, address = (yield multitask.accept(sock))    # WAIT
302 |            multitask.add(client_handler(conn))
303 | 
304 |    def client_handler(sock):
305 |        while True:
306 |            request = (yield multitask.recv(sock, 1024))      # WAIT
307 |            if not request:
308 |                break
309 |            response = handle_request(request)
310 |            yield multitask.send(sock, response)              # WAIT
311 | 
312 |    multitask.add(listener(sock))
313 |    multitask.run() 
314 | 
315 | 


--------------------------------------------------------------------------------
/publish/new-style-classes.txt:
--------------------------------------------------------------------------------
  1 | Idiomatic Python Take 3: new-style classes
  2 | ==========================================
  3 | 
  4 | Someone (Lila) asked me a question about pickling and memory usage
  5 | that led me on a chase through google, and along the way I was
  6 | reminded that new-style classes do have one or two interesting
  7 | points.
  8 | 
  9 | You may remember from the first day that there was a brief discussion
 10 | of new-style classes.  Basically, they're classes that inherit from
 11 | 'object' explicitly:
 12 | 
 13 | >>> class N(object):
 14 | ...   pass
 15 | 
 16 | and they have a bunch of neat features (covered `here
 17 | <http://www.python.org/download/releases/2.2.3/descrintro/>`__ in
 18 | detail).  I'm going to talk about two of them: __slots__ and descriptors.
 19 | 
 20 | __slots__ are a memory optimization.  As you know, you can assign any
 21 | attribute you want to an object:
 22 | 
 23 | >>> n = N()
 24 | >>> n.test = 'some string'
 25 | >>> print n.test
 26 | some string
 27 | 
 28 | Now, the way this is implemented behind the scenes is that there's a
 29 | dictionary hiding in 'n' (called 'n.__dict__') that holds all of the
 30 | attributes.  However, dictionaries consume a fair bit of memory above
 31 | and beyond their contents, so it might be good to get rid of the dictionary
 32 | in some circumstances and specify precisely what attributes a class has.
 33 | 
 34 | You can do that by creating a __slots__ entry:
 35 | 
 36 | >>> class M(object):
 37 | ...   __slots__ = ['x', 'y', 'z']
 38 | 
 39 | Now objects of type 'M' will contain only enough space to hold those three
 40 | attributes, and nothing else.
 41 | 
 42 | A side consequence of this is that you can no longer assign to arbitrary
 43 | attributes, however!
 44 | 
 45 | >>> m = M()
 46 | >>> m.x = 5
 47 | >>> m.a = 10
 48 | Traceback (most recent call last):
 49 |  ...
 50 | AttributeError: 'M' object has no attribute 'a'
 51 | 
 52 | This will look strangely like some kind of type declaration to people
 53 | familiar with B&D languages, but I assure you that it is not!  You are
 54 | supposed to use __slots__ only for memory optimization...
 55 | 
 56 | Speaking of memory optimization (which is what got me onto this in the
 57 | first place) apparently using new-style classes and __slots__ both
 58 | dramatically decrease memory consumption:
 59 | 
 60 |    http://mail.python.org/pipermail/python-list/2004-November/291840.html
 61 | 
 62 |    http://mail.python.org/pipermail/python-list/2004-November/291986.html
 63 | 
 64 | Managed attributes
 65 | ------------------
 66 | 
 67 | Another nifty pair of features in new-style classes are managed
 68 | attributes and descriptors.
 69 | 
 70 | You may know that in the olden days, you could intercept attribute access
 71 | by overwriting __getattr__:
 72 | 
 73 | >>> class A:
 74 | ...    def __getattr__(self, name):
 75 | ...        if name == 'special':
 76 | ...           return 5
 77 | ...        return self.__dict__[name]
 78 | >>> a = A()
 79 | >>> a.special
 80 | 5
 81 | 
 82 | This turns out to be kind of inefficient, because *every* attribute access
 83 | now goes through __getattr__.  Plus, it's a bit ugly and it can lead to
 84 | buggy code.
 85 | 
 86 | Python 2.2 introduced "managed attributes".  With managed attributes, you
 87 | can *define* functions that handle the get, set, and del operations for
 88 | an attribute:
 89 | 
 90 | >>> class B(object):
 91 | ...   def _get_special(self):
 92 | ...       return 5
 93 | ...   special = property(_get_special)
 94 | >>> b = B()
 95 | >>> b.special
 96 | 5
 97 | 
 98 | If you wanted to provide a '_set_special' function, you could do some
 99 | really bizarre things:
100 | 
101 | >>> class B(object):
102 | ...   def _get_special(self):
103 | ...      return 5
104 | ...   def _set_special(self, value):
105 | ...      print 'ignoring', value
106 | ...   special = property(_get_special, _set_special)
107 | >>> b = B()
108 | 
109 | Now, retrieving the value of the 'special' attribute will give you '5',
110 | no matter what you set it to:
111 | 
112 | >>> b.special
113 | 5
114 | >>> b.special = 10
115 | ignoring 10
116 | >>> b.special
117 | 5
118 | 
119 | Ignoring the array of perverse uses you could apply, this is actually
120 | useful -- for one example, you can now do internal consistency checking
121 | on attributes, intercepting inconsistent values before they actually get
122 | set.
123 | 
124 | Descriptors
125 | -----------
126 | 
127 | Descriptors are a related feature that let you implement attribute access
128 | functions in a different way.  First, let's define a read-only descriptor:
129 | 
130 | >>> class D(object):
131 | ...   def __get__(self, obj, type=None):
132 | ...      print 'in get:', obj
133 | ...      return 6
134 | 
135 | Now attach it to a class:
136 | 
137 | >>> class A(object):
138 | ...   val = D()
139 | 
140 | >>> a = A()
141 | >>> a.val				# doctest: +ELLIPSIS
142 | in get: <A object at ...>
143 | 6
144 | 
145 | What happens is that 'a.val' is checked for the presence of a __get__ function,
146 | and if such exists, it is called instead of returning 'val'.  You can also
147 | do this with __set__ and __delete__:
148 | 
149 | >>> class D(object):
150 | ...   def __get__(self, obj, type=None):
151 | ...      print 'in get'
152 | ...      return 6
153 | ...
154 | ...   def __set__(self, obj, value):
155 | ...      print 'in set:', value
156 | ...
157 | ...   def __delete__(self, obj):
158 | ...      print 'in del'
159 | 
160 | >>> class A(object):
161 | ...   val = D()
162 | >>> a = A()
163 | >>> a.val = 15
164 | in set: 15
165 | >>> del a.val
166 | in del
167 | >>> print a.val
168 | in get
169 | 6
170 | 
171 | This can actually give you control over things like the *types* of objects
172 | that are assigned to particular classes: no mean thing.
173 | 


--------------------------------------------------------------------------------
/publish/online-resources.txt:
--------------------------------------------------------------------------------
 1 | Online Resources for Python
 2 | ===========================
 3 | 
 4 | The obvious one: http://www.python.org/ (including, of course,
 5 | http://docs.python.org/).
 6 | 
 7 | The next most obvious one: comp.lang.python.announce /
 8 | `python-announce
 9 | <http://www.python.org/mailman/listinfo/python-announce-list>`__.
10 | This is a low traffic list that is really quite handy; note especially
11 | a brief summary of postings called "the Weekly Python-URL", which as
12 | far as I can tell is only available on this list.
13 | 
14 | `The Python Cookbook <http://www.activestate.com/ASPN/Python/Cookbook/>`__
15 | is chock full of useful recipes; some of them have been extracted and
16 | prettified in the O'Reilly Python Cookbook book, but they're all available
17 | through the Cookbook site.
18 | 
19 | The Daily Python-URL is distinct from the Weekly Python-URL; read it
20 | at http://www.pythonware.com/daily/.  Postings vary from daily to weekly.
21 | 
22 | http://planet.python.org and http://www.planetpython.org/ are Web sites
23 | that aggregate Python blogs (mine included, hint hint).  Very much worth
24 | skimming over a coffee break.
25 | 
26 | And, err, Google is a fantastic way to figure stuff out!
27 | 


--------------------------------------------------------------------------------
/publish/pyparsing-presentation.txt:
--------------------------------------------------------------------------------
  1 | pyparsing
  2 | =========
  3 | 
  4 | Basic pyparsing
  5 | ---------------
  6 | 
  7 | Matching text
  8 | ~~~~~~~~~~~~~
  9 | 
 10 | >>> import pyparsing
 11 | >>> from pyparsing import Word, printables, Literal, StringEnd, Optional
 12 | >>> grammar = Literal("Hello,") + Word(printables)
 13 | >>> print grammar.parseString("Hello, nurse!")
 14 | ['Hello,', 'nurse!']
 15 | 
 16 | So that's easy enough.  But here, we *know* that 'Hello' is going to be
 17 | there -- we're really only interested in the word *after* 'Hello'.
 18 | 
 19 | >>> grammar = Literal("Hello,").suppress() + Word(printables)
 20 | >>> print grammar.parseString("Hello, nurse!")
 21 | ['nurse!']
 22 | 
 23 | Let's break things down a bit:
 24 | 
 25 | >>> article = Word(printables)
 26 | >>> grammar = Literal("Hello,").suppress() + article
 27 | >>> print grammar.parseString("Hello, nurse!")
 28 | ['nurse!']
 29 | 
 30 | Wouldn't it be nice to give the article ("nurse!") a name?
 31 | 
 32 | >>> article = Word(printables).setResultsName("the_article")
 33 | >>> grammar = Literal("Hello,").suppress() + article
 34 | >>> results = grammar.parseString("Hello, nurse!")
 35 | 
 36 | Now, given this, you can do two things: you can either refer to the result
 37 | as an element of a list,
 38 | 
 39 | >>> print results[0]
 40 | nurse!
 41 | 
 42 | or by name:
 43 | 
 44 | >>> print results.the_article
 45 | nurse!
 46 | 
 47 | This kind of naming is incredibly handy and it's one of the main reasons
 48 | I chose pyparsing.  For example, let's try out more complicated example:
 49 | 
 50 | >>> article = Word(printables).setResultsName("the_article")
 51 | >>> salutation = (Literal("Hello,") | Literal("Goodbye,")).suppress()
 52 | >>> adjective = Word(printables).setResultsName('adjective')
 53 | >>> grammar = ((salutation + adjective + article) | \
 54 | ...	       (salutation + article)) + StringEnd()
 55 | 
 56 | This can match "Hello, nurse!":
 57 | 
 58 | >>> results_1 = grammar.parseString("Hello, nurse!")
 59 | 
 60 | as well as "Goodbye, cruel world!":
 61 | 
 62 | >>> results_2 = grammar.parseString("Goodbye, cruel world!")
 63 | 
 64 | but in *both* cases you can extract ``the_article`` by name:
 65 | 
 66 | >>> print results_1.the_article
 67 | nurse!
 68 | >>> print results_2.the_article
 69 | world!
 70 | 
 71 | And, of course, the ``adjective`` result is only set in the case where it
 72 | was matched:
 73 | 
 74 | >>> print results_1.adjective
 75 | <BLANKLINE>
 76 | >>> print results_2.adjective
 77 | cruel
 78 | 
 79 | Note that this was not a particularly good example; rather than writing
 80 | the grammer like so: 
 81 | 
 82 | >>> grammar = ((salutation + adjective + article) | \
 83 | ...	       (salutation + article)) + StringEnd()
 84 | 
 85 | I could have written it like this:
 86 | 
 87 | >>> grammar = salutation + Optional(adjective) + article + StringEnd()
 88 | 
 89 | Interlude: whitespace drives tokenizing
 90 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 91 | 
 92 | I've studiously avoided talking about removing that final '!' from
 93 | "Hello, nurse!" until now, and that's because it's not this simple: ::
 94 | 
 95 |   article = Word(printables) + "!"
 96 |   print article.parseString("nurse!")
 97 | 
 98 | You see, the '+' operator (a.k.a. ``pyparsing.And``) only joins *tokens*,
 99 | and pyparsing implicitly tokenizes on *whitespace*.  So *this* would work,
100 | 
101 | >>> article = Word(printables) + "!"
102 | >>> print article.parseString("nurse !")
103 | ['nurse', '!']
104 | 
105 | because now you're parsing 'Word AND !'.
106 | 
107 | (The only way I know of to remove that '!' is to use a parse action:
108 | 
109 | >>> def remove_exclamation(x):
110 | ...    return x[0].rstrip('!')
111 | >>> article = Word(printables)
112 | >>> article = article.setParseAction(remove_exclamation)
113 | >>> print article.parseString("nurse!")
114 | ['nurse']
115 | 
116 | More about parse actions later.)
117 | 
118 | Bottom line: tokenizing on whitespace makes a lot of things easier, and
119 | some things harder; I guess it's a good thing...
120 | 
121 | SkipTo
122 | ~~~~~~
123 | 
124 | Suppose you have an annoying section of text that you want to just
125 | jump past and not parse:
126 | 
127 | >>> annoying = """
128 | ... SOMETHING
129 | ... SOMETHING ELSE
130 | ... END
131 | ... MORE STUFF THAT MATTERS
132 | ... """
133 | 
134 | This is easily handled with SkipTo:
135 | 
136 | >>> from pyparsing import SkipTo
137 | >>> end_marker = SkipTo("END", include=True).suppress()
138 | >>> (end_marker + "MORE STUFF THAT MATTERS").parseString(annoying)
139 | (['MORE STUFF THAT MATTERS'], {})
140 | 
141 | Regex matches
142 | ~~~~~~~~~~~~~
143 | 
144 | You can do regular expression matches too:
145 | 
146 | >>> from pyparsing import Regex
147 | >>> hex_num = Regex("[0-9a-fA-F]+")
148 | >>> hex_num.parseString("1f")
149 | (['1f'], {})
150 | 
151 | Lists and more
152 | ~~~~~~~~~~~~~~
153 | 
154 | Suppose we want to allow matches to multiple hex numbers.  We can do this:
155 | 
156 | >>> from pyparsing import OneOrMore
157 | >>> multiple = OneOrMore(hex_num)
158 | >>> multiple.parseString('1f')
159 | (['1f'], {})
160 | >>> multiple.parseString('1f 2f 3f')
161 | (['1f', '2f', '3f'], {})
162 | 
163 | Parse actions
164 | ~~~~~~~~~~~~~
165 | 
166 | Parse actions are functions that are run on parsed tokens; generally,
167 | the result of the parse action replaces the parsed token.  For example,
168 | 
169 | >>> def convert_hex(x):
170 | ...   return eval('0x' + x[0])
171 | >>> hex_num = hex_num.setParseAction(convert_hex).setResultsName('num')
172 | >>> result = hex_num.parseString('1f')
173 | >>> print result.num
174 | 31
175 | 
176 | As you can see, this sort of parse function allows you to convert
177 | parse results into objects automagically (after all, there's no reason
178 | that ``convert_hex`` needs to return an integer; it could return an object
179 | of any type).
180 | 
181 | Defining convenient and re-usable parse objects
182 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
183 | 
184 | This brings us to a "putting it all together" moment: suppose that you
185 | have some kind of string that turns up throughout the string you're
186 | parsing.  For the parser I was working on, one common string was an
187 | expectation value (i.e. a floating point number).  This was no
188 | *ordinary* floating point number, though: it *could* start with 'e', which
189 | meant that you needed to prepend a '1'.  For example, ::
190 | 
191 |    1.0  ==>  1.0
192 |    1e-5 ==>  1e-5
193 |    e-5  ==>  1e-5
194 | 
195 | Well, the first obvious way to handle this is like so:
196 | 
197 | >>> from pyparsing import Word, nums
198 | >>> e_val = Word(nums + "e-")
199 | >>> def convert_eval(x):
200 | ...    e = x[0]
201 | ...    if e.startswith('e'): e = '1' + e
202 | ...    return float(e)
203 | >>> e_val = e_val.setParseAction(convert_eval)
204 | >>> e_val.parseString('e-5')
205 | ([1.0000000000000001e-05], {})
206 | 
207 | OK, that's acceptable, but ugly if you have lots of e_val's floating around.
208 | 
209 | You could refine things by naming your expectation values when they occur:
210 | 
211 | >>> e_val_1 = e_val.setResultsName('e_val_1')
212 | >>> e_val_2 = e_val.setResultsName('e_val_2')
213 | >>> results = (e_val_1 + e_val_2).parseString('1e-3 5.0')
214 | >>> results.e_val_1
215 | 0.001
216 | >>> results.e_val_2
217 | 5.0
218 | 
219 | ...but that's still a lot of code.  Here's the suggestion that Paul made
220 | to me when he first saw my hacked-together parser:
221 | 
222 | >>> e_val = Word(nums + "e-")
223 | >>> def named_e_val(name):
224 | ...    x = Word(nums + "e-").setParseAction(convert_eval).copy()
225 | ...    x = x.setResultsName(name)
226 | ...    return x
227 | 
228 | Now I can just say
229 | 
230 | >>> grammar = named_e_val('e_val_1') + named_e_val('e_val_2')
231 | >>> results = grammar.parseString('1e-3 5.0')
232 | >>> results.e_val_1
233 | 0.001
234 | >>> results.e_val_2
235 | 5.0
236 | 
237 | Building a BLAST output parser
238 | ------------------------------
239 | 
240 | Now on to my real problem: building an output parser for NCBI BLAST.
241 | 
242 | Briefly, NCBI BLAST is a very widely used sequence search algorithm,
243 | and it has a notoriously annoying set of output formats.  The most
244 | annoying thing about the output is that only the human-intended
245 | output contains a full set of information, so you need to parse something
246 | that has been formatted for humans.
247 | 
248 | Another annoying thing about the output format is that it changes regularly
249 | in subtle ways.  This means that most BLAST parsers break at least once a
250 | year.
251 | 
252 | Why did I choose pyparsing?
253 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
254 | 
255 | I chose pyparsing for this project partly because I am using it for
256 | twill, and it worked quite well there. However, the main decision
257 | point was that I needed to make a *readable* and *maintainable*
258 | parser, so that down the road I wouldn't have to relearn all sorts of
259 | nasty syntax in order to update the parser when NCBI changed their
260 | output formats.  pyparsing seemed to fit that bill.
261 | 
262 | What kind of output do I need to deal with?
263 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
264 | 
265 | So, how bad is the output?  Well, here's an example: ::
266 | 
267 |    >ref|NP_598432.1| U2 small nuclear ribonucleoprotein auxiliary factor (U2AF) 2 [Mus
268 |               musculus]
269 |              Length = 306
270 |    
271 |     Score =  394 bits (1013), Expect = e-110
272 |     Identities = 202/279 (72%), Positives = 222/279 (79%)
273 |     Frame = -1
274 |    
275 |    Query: 888 FLNNQMKLAGLAQAPGNPVLAVQITWDKNFSSLEFRSVDETTQALAFDGIIFQGQSLKLR 709
276 |               F N QM+L GL QAPGNPVLAVQI  DKNF+ LEFRSVDETTQA+AFDGIIFQGQSLK+R
277 |    Sbjct: 3   FFNAQMRLGGLTQAPGNPVLAVQINQDKNFAFLEFRSVDETTQAMAFDGIIFQGQSLKIR 62
278 |    
279 |    Query: 708 RPHDYQPLPGMSESPALHVPVGVVSTVVQDTPHKLFIGGLPSYLTDDQVKELLTSFGPLK 529
280 |               RPHDYQPLPGMSE+P+++VP GVVSTVV D+ HKLFIGGLP+YL DDQVKELLTSFGPLK
281 |    Sbjct: 63  RPHDYQPLPGMSENPSVYVP-GVVSTVVPDSAHKLFIGGLPNYLNDDQVKELLTSFGPLK 121
282 |    
283 |    Query: 528 AFNLVKDSATCFSKGYAFCEYADVNVTDQAIAGLNGMQLGDKKLIVQRASVGAKNANXXX 349
284 |               AFNLVKDSAT  SKGYAFCEY D+NVTDQAIAGLNGMQLGDKKL+VQRASVGAKNA
285 |    Sbjct: 122 AFNLVKDSATGLSKGYAFCEYVDINVTDQAIAGLNGMQLGDKKLLVQRASVGAKNATLST 181
286 |    
287 |    Query: 348 XXXXXXXXXXPGLASSQVQHSGLPTEVLCLMNMVTPXXXXXXXXXXXXXXXXXXECGKYG 169
288 |                         PGL SSQVQ  G PTEVLCLMNMV P                  EC KYG
289 |    Sbjct: 182 INQTPVTLQVPGLMSSQVQMGGHPTEVLCLMNMVLPEELLDDEEYEEIVEDVRDECSKYG 241
290 |    
291 |    Query: 168 SVRSVEIPRPVNGLDIPGCGKIFVEFASLLDCQRAQQAL 52
292 |                V+S+EIPRPV+G+++PGCGKIFVEF S+ DCQ+A Q L 
293 |    Sbjct: 242 LVKSIEIPRPVDGVEVPGCGKIFVEFTSVFDCQKAMQGL 280
294 | 
295 | Lots of finicky things to parse in there, eh?  Let's focus on the score: ::
296 | 
297 |     Score =  394 bits (1013), Expect = e-110
298 |     Identities = 202/279 (72%), Positives = 222/279 (79%)
299 |     Frame = -1
300 |    
301 | My first iteration
302 | ~~~~~~~~~~~~~~~~~~
303 | 
304 | Here's my first set of code: ::
305 | 
306 |         self.score = Literal("Score =").suppress() +
307 |          Word(nums + ".").setParseAction(make_float).setResultsName('bits') +
308 |                 Literal("bits (").suppress() +
309 |          Word(nums).setParseAction(make_int).setResultsName('bits_max') +
310 |                 Literal("),").suppress() +
311 |                 Word("Expect()" + nums).suppress() + Literal("=") +
312 |          Word(e_val).setParseAction(make_float).setResultsName('expect') +
313 |                 Literal("Identities =").suppress() + identities +
314 |                 Optional(Literal("Positives =").suppress() + positives) +
315 |                 Optional(Literal("Gaps =").suppress() + gaps) +
316 |                 Optional((Literal("Frame =").suppress() +
317 |                 Word(frame).setParseAction(make_int).setResultsName('frame1') +
318 |                 Optional(Literal("/").suppress() +
319 |          Word(frame).setParseAction(make_int).setResultsName('frame2'))) |
320 |                 (Literal("Strand =") + restOfLine))
321 | 
322 | 
323 | What can I say?  It worked...
324 | 
325 | What it looked like after Paul's suggestions
326 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
327 | 
328 | I sent the above on to Paul, and after some gagging, he sent me back a bunch
329 | of suggestions.  I ended up with this: ::
330 | 
331 |         self.score = Literal("Score =") + named_float('bits') +
332 |                 "bits (" + named_int('bits_max') + ")," +
333 |                 Word("Expect()" + nums) + "=" + named_float('expect') +
334 |                 "Identities =" + identities +
335 |                 Optional("Positives =" + positives) +
336 |                 Optional("Gaps =" + gaps) +
337 |                 Optional(("Frame =" + named_frame('frame1') +
338 |                   Optional("/" + named_frame('frame2'))) |
339 |                          ("Strand =" + restOfLine))
340 | 
341 | This is clearly much friendler to read!
342 | 
343 | I would also like to note that this kind of refactoring is tricky to do
344 | without being able to test each subset of the parse grammar.  pyparsing
345 | let me break the parse grammar down into subsets in a very nice and
346 | convenient way, which really helped with testing.
347 | 
348 | A sort of conclusion
349 | ~~~~~~~~~~~~~~~~~~~~
350 | 
351 | My parsing code is basically a generator wrapped around pyparsing
352 | results; here's what the ``blastparser`` API looks like: ::
353 | 
354 |    for record in parse_file('blast_output.txt'):
355 |       print '-', record.query_name, record.database.name
356 |       for hit in record.hits:
357 |          print '--', hit.subject_name, hit.subject_length
358 |          print '  ', hit.total_score, hit.total_expect
359 |          for submatch in hit:
360 |             print submatch.expect, submatch.bits
361 |             
362 |             print submatch.query_sequence
363 |             print submatch.alignment
364 |             print submatch.subject_sequence
365 | 
366 | Because I use parse actions to turn each block into an object, it's really
367 | a very thin layer.
368 | 
369 | Future thoughts
370 | ~~~~~~~~~~~~~~~
371 | 
372 | Speed.  Speed speed speed speed.  How can I speed things up?
373 | 
374 | Without profiling, I'm not sure where the bottlenecks are, and that
375 | should probably be my first step.  Nonetheless, I'm planning to try
376 | out lazy evaluation, which would work something like this.
377 | 
378 | First, define the block structure:
379 | 
380 | >>> from pyparsing import SkipTo
381 | >>> complex_block = """
382 | ... SOMETHING
383 | ... SOMETHING ELSE
384 | ... END
385 | ... MORE STUFF THAT MATTERS
386 | ... """
387 | >>> end_marker = SkipTo("END", include=True).suppress()
388 | 
389 | Build a grammar for the internal structure:
390 | 
391 | >>> internal_grammar = "..."
392 | 
393 | Define a lazy evaluation class:
394 | 
395 | >>> class LazyParse:
396 | ...    def __init__(self, text):
397 | ...       self.text = text
398 | ...       self.parsed = None
399 | ...    def parse(self):
400 | ...       if self.parsed:
401 | ...          return self.parsed
402 | ...       self.parsed = internal_grammar.parseString(self.text)
403 | ...       return self.parsed
404 | 
405 | Then, set a parse action:
406 | 
407 | >>> def parse_complex_block(x):
408 | ...   return LazyParse(x[0])
409 | >>> end_marker = end_marker.setParseAction(parse_complex_block)
410 | 
411 | and Bob's your uncle... but I haven't gotten all the mechanics worked out.
412 | 


--------------------------------------------------------------------------------
/publish/python-30.txt:
--------------------------------------------------------------------------------
 1 | Python 3.0
 2 | ==========
 3 | 
 4 | What's coming in Python 3000 (a.k.a. Python 3.0)?
 5 | 
 6 | First of all, Python 3000 will be out sometime in 2008; large parts of
 7 | it have already been implemented.  It is simply an increment on the
 8 | current code base.
 9 | 
10 | The biggest point is that Python 3000 will break backwards
11 | compatibility, abruptly.  This is very unusual for Python, but is
12 | necessary to get rid of some old cruft.  In general, Python has been
13 | very good about updating slowly and incrementally without breaking
14 | backwards compatibility very much; this approach is being abandoned
15 | for the jump from 2.x to 3.x.
16 | 
17 | However, the actual impact of this is likely to be small.  There will
18 | be a few expressions that no longer work -- for example, 'dict.has_key'
19 | is being removed, because you can just do 'if key in dict' -- but a
20 | lot of the changes are behind the scenes, e.g. functions that currently
21 | return lists will return iterators (dict.iterkeys -> dict.keys).
22 | 
23 | The biggest impact on this audience (scientists & numerical people) is
24 | probably that (in Python 3.0) 6 / 5 will no longer be 0, and <> is
25 | being removed.
26 | 
27 | Where lots of existing code must be made Python 3k compatible, you will
28 | be able to use an automated conversion tool.  This should "just work"
29 | except for cases where there is ambiguity in intent.
30 | 
31 | The most depressing aspect of Py3k (for me) is that the stdlib is not
32 | being reorganized, but this does mean that most existing modules will
33 | still be in the same place!
34 | 
35 | See `David Mertz's blog
36 | <http://www-03.ibm.com/developerworks/blogs/page/davidmertz?entry=second_day_python_3000>`__
37 | for his summary of the changes.
38 | 


--------------------------------------------------------------------------------
/publish/run-doctests.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import doctest
 3 | import sys
 4 | 
 5 | for filename in sys.argv[1:]:
 6 |     print '... running doctests on', filename
 7 |     doctest.testfile(filename)
 8 | 
 9 | print '*** SUCCESS ***'
10 | 


--------------------------------------------------------------------------------
/publish/testing-python.txt:
--------------------------------------------------------------------------------
  1 | Testing Your Software
  2 | =====================
  3 | 
  4 | "Debugging is twice as hard as writing the code in the first place.
  5 | Therefore, if you write the code as cleverly as possible, you are, by
  6 | definition, not smart enough to debug it."  -- Brian W. Kernighan.
  7 | 
  8 | Everyone tests their software to some extent, if only by running it
  9 | and trying it out (technically known as "smoke testing").  Most
 10 | programmers do a certain amount of exploratory testing, which involves
 11 | running through various functional paths in your code and seeing if
 12 | they work.
 13 | 
 14 | Systematic testing, however, is a different matter.  Systematic
 15 | testing simply cannot be done properly without a certain (large!)
 16 | amount of automation, because every change to the software means that
 17 | the software needs to be tested all over again. 
 18 | 
 19 | Below, I will introduce you to some lower level automated testing
 20 | concepts, and show you how to use built-in Python constructs to start
 21 | writing tests.
 22 | 
 23 | An introduction to testing concepts
 24 | -----------------------------------
 25 | 
 26 | There are several types of tests that are particularly useful to
 27 | research programmers.  *Unit tests* are tests for fairly small and
 28 | specific units of functionality.  *Functional tests* test entire
 29 | functional paths through your code.  *Regression tests* make sure
 30 | that (within the resolution of your records) your program's output
 31 | has not changed.
 32 | 
 33 | All three types of tests are necessary in different ways.
 34 | 
 35 | Regression tests tell you when unexpected changes in behavior occur,
 36 | and can reassure you that your basic data processing is still working.
 37 | For scientists, this is particularly important if you are trying to
 38 | link past research results to new research results: if you can no
 39 | longer replicate your original results with your updated code, then
 40 | you must regard your code with suspicion, *unless* the changes are
 41 | intentional.
 42 | 
 43 | By contrast, both unit and functional tests tend to be *expectation*
 44 | based.  By this I mean that you use the tests to lay out what behavior
 45 | you *expect* from your code, and write your tests so that they *assert*
 46 | that those expectations are met.
 47 | 
 48 | The difference between unit and functional tests is blurry in most
 49 | actual implementations; unit tests tend to be much shorter and require
 50 | less setup and teardown, while functional tests can be quite long. I
 51 | like Kumar McMillan's distinction: functional tests tell you *when*
 52 | your code is broken, while unit tests tell you *where* your code is
 53 | broken.  That is, because of the finer granularity of unit tests, a
 54 | broken unit test can identify a particular piece of code as the source
 55 | of an error, while functional tests merely tell you that a feature is
 56 | broken.
 57 | 
 58 | The doctest module
 59 | ------------------
 60 | 
 61 | Let's start by looking at the doctest module.  If you've been
 62 | following along, you will be familiar with doctests, because I've been
 63 | using them throughout this text!  A doctest links code and behavior
 64 | explicitly in a nice documentation format.  Here's an example:
 65 | 
 66 | >>> print 'hello, world'
 67 | hello, world
 68 | 
 69 | When doctest sees this in a docstring or in a file, it knows that it
 70 | should execute the code after the '>>>' and compare the actual output
 71 | of the code to the strings immediately following the '>>>' line.
 72 | 
 73 | To execute doctests, you can use the doctest API that comes with
 74 | Python: just type: ::
 75 | 
 76 |    import doctest
 77 |    doctest.testfile(textfile)
 78 | 
 79 | or ::
 80 | 
 81 |    import doctest
 82 |    doctest.testmod(modulefile)
 83 | 
 84 | The doctest docs contain complete documentation for the module, but
 85 | in general there are only a few things you need to know.
 86 | 
 87 | First, for multi-line entries, use '...' instead of '>>>':
 88 | 
 89 | >>> def func():
 90 | ...   print 'hello, world'
 91 | >>> func()
 92 | hello, world
 93 | 
 94 | Second, if you need to elide exception code, use '...':
 95 | 
 96 | >>> raise Exception("some error occurred")
 97 | Traceback (most recent call last):
 98 |    ...
 99 | Exception: some error occurred
100 | 
101 | More generally, you can use '...' to match random output, as long as you
102 | you specify a doctest directive:
103 | 
104 | >>> import random
105 | >>> print 'random number:', random.randint(0, 10)  # doctest: +ELLIPSIS
106 | random number: ...
107 | 
108 | Third, doctests are terminated with a blank line, so if you
109 | explicitly expect a blank line, you need to use a special construct:
110 | 
111 | >>> print ''
112 | <BLANKLINE>
113 | 
114 | To test out some doctests of your own, try modifying these files
115 | and running them with ``doctest.testfile``.
116 | 
117 | Doctests are useful in a number of ways.  They encourage a kind of
118 | conversation with the user, in which you (the author) demonstrate how
119 | to actually use the code.  And, because they're executable, they
120 | ensure that your code works as you expect.  However, they can also
121 | result in quite long docstrings, so I recommend putting long doctests
122 | in files separate from the code files.  Short doctests can go anywhere
123 | -- in module, class, or function docstrings.
124 | 
125 | Unit tests with unittest
126 | ------------------------
127 | 
128 | If you've heard of automated testing, you've almost certainly heard of
129 | unit tests.  The idea behind unit tests is that you can constrain the
130 | behavior of small units of code to be correct by testing the bejeezus
131 | out of them; and, if your smallest code units are broken, then how can
132 | code built on top of them be good?
133 | 
134 | The `unittest module <http://docs.python.org/lib/module-unittest.html>`__
135 | comes with Python.  It provides a framework for writing and running
136 | unit tests that is at least convenient, if not as simple as it could be
137 | (see the 'nose' stuff, below, for something that is simpler).
138 | 
139 | Unit tests are almost always demonstrated with some sort of numerical
140 | process, and I will be no different.  Here's a simple unit test, using
141 | the unittest module: ::
142 | 
143 |  test_sort.py:
144 | 
145 |   #! /usr/bin/env python
146 |   import unittest
147 |   class Test(unittest.TestCase):
148 |    def test_me(self):
149 |       seq = [ 5, 4, 1, 3, 2 ]
150 |       seq.sort()
151 |       self.assertEqual(seq, [1, 2, 3, 4, 5])
152 | 
153 |   if __name__ == '__main__':
154 |      unittest.main()
155 | 
156 | If you run this, you'll see the following output: ::
157 | 
158 |   .
159 |   ----------------------------------------------------------------------
160 |   Ran 1 test in 0.000s
161 | 
162 |   OK
163 | 
164 | Here, ``unittest.main()`` is running through all of the symbols in the
165 | global module namespace and finding out which classes inherit from
166 | ``unittest.TestCase``.  Then, for each such class, it finds all methods
167 | starting with ``test``, and for each one it instantiates a new object
168 | and runs the function: so, in this case, just: ::
169 | 
170 |   Test().test_me()
171 | 
172 | If any method fails, then the failure output is recorded and presented
173 | at the end, but the rest of the test methods are run irrespective.
174 | 
175 | ``unittest`` also includes support for test *fixtures*, which are functions
176 | run before and after each test; the idea is to use them to set up and
177 | tear down the test environment.  In the code below, ``setUp`` creates
178 | and shuffles the ``self.seq`` sequence, while ``tearDown`` deletes it. ::
179 | 
180 |  test_sort2.py:
181 | 
182 |     #! /usr/bin/env python
183 |     import unittest
184 |     import random
185 | 
186 |     class Test(unittest.TestCase):
187 |         def setUp(self):
188 |             self.seq = range(0, 10)
189 |             random.shuffle(self.seq)
190 | 
191 |         def tearDown(self):
192 |             del self.seq
193 | 
194 |         def test_basic_sort(self):
195 |             self.seq.sort()
196 |             self.assertEqual(self.seq, range(0, 10))
197 | 
198 |         def test_reverse(self):
199 |             self.seq.sort()
200 |             self.seq.reverse()
201 |             self.assertEqual(self.seq, [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])
202 | 
203 |         def test_destruct(self):
204 |             self.seq.sort()
205 |             del self.seq[-1]
206 |             self.assertEqual(self.seq, range(0, 9))
207 | 
208 |     unittest.main()
209 | 
210 | In both of these examples, it's important to realize that an *entirely
211 | new object* is created, and the fixtures run, for each test function.  This
212 | lets you write tests that alter or destroy test data without having to
213 | worry about interactions between the code in different tests.
214 | 
215 | Testing with nose
216 | -----------------
217 | 
218 | nose is a unit test discovery system that makes writing and organizing
219 | unit tests very easy.  I've actually written a whole separate article on
220 | them, so we should go `check that out <nose-intro.html>`__.
221 | 
222 | .. (CTB: testing primes?)
223 | 
224 | Code coverage analysis
225 | ----------------------
226 | 
227 | `figleaf <http://darcs.idyll.org/~t/projects/figleaf/README.html>`__
228 | is a code coverage recording and analysis system that I wrote and
229 | maintain.  It's published in PyPI, so you can install it with
230 | easy_install.
231 | 
232 | Basic use of figleaf is very easy.  If you have a script ``program.py``,
233 | rather than typing ::
234 | 
235 |        % python program.py
236 | 
237 | to run the script, run ::
238 | 
239 |        % figleaf program.py
240 | 
241 | This will transparently and invisibly record coverage to the file
242 | '.figleaf' in the current directory.  If you run the program several
243 | times, the coverage will be aggregated.
244 | 
245 | To get a coverage report, run 'figleaf2html'.  This will produce a
246 | subdirectory ``html/`` that you can view with any Web browser; the
247 | index.html file will contain a summary of the code coverage, along
248 | with links to individual annotated files.  In these annotated files,
249 | executed lines are colored green, while lines of code that are not
250 | executed are colored red.  Lines that are not considered lines of code
251 | (e.g. docstrings, or comments) are colored black.
252 | 
253 | My main use for code coverage analysis is in testing (which is why I
254 | discuss it in this section!)  I record the code coverage for my unit
255 | and functional tests, and then examine the output to figure out which
256 | files or libraries to focus on testing next.  As I discuss below, it
257 | is relatively easy to achieve 70-80% code coverage by this method.
258 | 
259 | When is code coverage most useful?  I think it's most useful in the
260 | early and middle stages of testing, when you need to track down code
261 | that is not touched by your tests.  However, 100% code coverage by your
262 | tests doesn't guarantee bug free code: this is because figleaf only measures
263 | line coverage, not branch coverage.  For example, consider this code: ::
264 | 
265 |   if a.x or a.y:
266 |      f()
267 | 
268 | If ``a.x`` is True in all your tests, then ``a.y`` will never be
269 | evaluated -- even though ``a`` may not have an attribute ``y``, which
270 | would cause an AttributeError (which would in turn be a bug, if not
271 | properly caught).  Python does not record which subclauses of the
272 | ``if`` statement are executed, so without analyzing the structure of
273 | the program there's no simple way to figure it out.
274 | 
275 | Here's another buggy example with 100% code coverage: ::
276 | 
277 |    def f(a):
278 |       if a:
279 |          a = a.upper()
280 |       return a.strip()
281 | 
282 |    s = f("some string")
283 | 
284 | Here, there's an implicit ``else`` after the if statement; the function f()
285 | could be rewritten to this: ::
286 | 
287 |    def f(a):
288 |       if a:
289 |          a = a.upper()
290 |       else:
291 |          pass
292 |       return a.strip()
293 | 
294 |    s = f("some string")
295 | 
296 | and the pass statement would show up as "not executed".
297 | 
298 | So, bottom line: 100% test coverage is *necessary* for a well-tested
299 | program, because code that is not executed by any test at all is
300 | simply not being tested.  However, 100% test coverage is not
301 | *sufficient* to guarantee that your program is free of bugs, as you can
302 | see from some of the examples above.
303 | 
304 | Adding tests to an existing project
305 | -----------------------------------
306 | 
307 | This testing discussion should help to convince you that not only
308 | *should* you test, but that there are plenty of tools available to
309 | *help* you test in Python.  It may even give you some ideas about how
310 | to start testing new projects.  However, retrofitting an *existing*
311 | project with tests is a different, challenging problem -- where do you
312 | start?  People are often overwhelmed by the amount of code they've
313 | written in the past.
314 | 
315 | I suggest the following approach.
316 | 
317 | First, start by writing a test for each bug as they are discovered.
318 | The procedure is fairly simple: isolate the cause of the bug; write a
319 | test that demonstrates the bug; fix the bug; verify that the test
320 | passes.  This has several benefits in the short term: you are fixing
321 | bugs, you're discovering weak points in your software, you're becoming
322 | more familiar with the testing approach, and you can start to think
323 | about commonalities in the fixtures necessary to *support* the tests.
324 | 
325 | Next, take out some time -- half a day or so -- and write fixtures and
326 | functional tests for some small chunk of code; if you can, pick a piece
327 | of code that you're planning to clean up or extend.  Don't worry about
328 | being exhaustive, but just write tests that target the main point of
329 | the code that you're working on.
330 | 
331 | Repeat this a few times.  You should start to discover the benefits of
332 | testing at this point, as you increasingly prevent bugs from occurring
333 | in the code that's covered by the tests.  You should also start to get
334 | some idea of what fixtures are necessary for your code base.
335 | 
336 | Now use code coverage analysis to analyze what code your tests cover,
337 | and what code isn't covered.  At this point you can take a targetted
338 | approach and spend some time writing tests aimed directly at uncovered
339 | areas of code.  There should now be tests that cover 30-50% of your
340 | code, at least (it's very easy to attain this level of code
341 | coverage!).
342 | 
343 | Once you've reached this point, you can either decide to focus on
344 | increasing your code coverage, or (my recommendation) you can simply
345 | continue incrementally constraining your code by writing tests for bugs
346 | and new features.  Assuming you have a fairly normal code churn, you should
347 | get to the point of 70-80% coverage within a few months to a few years
348 | (depending on the size of the project!)
349 | 
350 | This approach is effective because at each stage you get immediate
351 | feedback from your efforts, and it's easier to justify to managers
352 | than a whole-team effort to add testing.  Plus, if you're unfamiliar
353 | with testing or with parts of the code base, it gives you time to adjust
354 | and adapt your approach to the needs of the particular project.
355 | 
356 | Two articles that discuss similar approaches in some detail are
357 | available online: `Strangling Legacy Code
358 | <http://www.stickyminds.com/s.asp?F=S9705_MAGAZINE_2>`__, and `Growing
359 | Your Test Harness
360 | <http://www.developertesting.com/archives/GrowYourHarness.pdf>`__.  I
361 | can also recommend the book `Working Effectively with Legacy Code
362 | <http://www.amazon.com/Working-Effectively-Legacy-Robert-Martin/dp/0131177052>`__,
363 | by Robert Martin.
364 | 
365 | Concluding thoughts on automated testing
366 | ----------------------------------------
367 | 
368 | Starting to do automated testing of your code can lead to immense
369 | savings in maintenance and can also increase productivity
370 | dramatically.  There are a number of reasons why automated testing can
371 | help so much, including quick discovery of regressions, increased
372 | design awareness due to more interaction with the code, and early
373 | detection of simple bugs as well as unwanted epistatic interactions
374 | between code modules.  The single biggest improvement for me has been
375 | the ability to refactor code without worrying as much about breakage.
376 | In my personal experience, automated testing is a 5-10x productivity
377 | booster when working alone, and it can save multi-person teams from
378 | potentially disastrous errors in communication.
379 | 
380 | Automated testing is not, of course, a silver bullet.  There are several
381 | common worries.
382 | 
383 | One worry is that by increasing the total amount of code in a project,
384 | you increase both the development time and the potential for bugs and
385 | maintenance problems.  This is certainly possible, but test code is
386 | very different from regular project code: it can be removed much more
387 | easily (which can be done whenever the code being tested undergoes
388 | revision), and it should be *much* simpler even if it is in fact
389 | bulkier.
390 | 
391 | Another worry is that too much of a focus on testing will decrease the
392 | drive for new functionality, because people will focus more on writing
393 | tests than they will on the new code.  While this is partly a
394 | managerial issues, it is worth pointing out that the process of
395 | writing new code will be dramatically faster if you don't have to
396 | worry about old code breaking in unexpected ways as you add
397 | functionality.
398 | 
399 | A third worry is that by focusing on automation, you will miss bugs in
400 | code that is difficult to automate.  There are two considerations
401 | here.  First, it is possible to automate quite a bit of testing; the
402 | decision not to automat a particular test is almost always made
403 | because of financial or time considerations rather than technical
404 | limitations.  And, second, automated testing is simply not a
405 | replacement for certain types of manual testing -- in particular,
406 | exploratory testing, in which the programmers or users interact with
407 | the program, will always turn up new bugs, and is worth doing
408 | independent of the automated tests.
409 | 
410 | How much to test, and what to test, are decisions that need to be made
411 | on an individual project basis; there are no hard and fast rules.
412 | However, I feel confident in saying that some automated testing will
413 | always improve the quality of your code and result in maintenance
414 | improvements.
415 | 


--------------------------------------------------------------------------------
/publish/tools.txt:
--------------------------------------------------------------------------------
 1 | Tools to Help You Work
 2 | ======================
 3 | 
 4 | IPython
 5 | -------
 6 | 
 7 | `IPython <http://ipython.scipy.org/moin/About>`__ is an interactive
 8 | interpreter that aims to be a very convenient shell for working with
 9 | Python.
10 | 
11 | Features of note:
12 | 
13 |  - Tab completion
14 |  - ? and ?? help
15 |  - history
16 |  - CTRL-P search (in addition to standard CTRL-R/emacs)
17 |  - use an editor to write stuff, and export stuff into an edtor
18 |  - colored exception tracebacks
19 |  - automatic function/parameter call stuff
20 |  - auto-quoting with ','
21 |  - 'run' (similar to execfile) but with -n, -i
22 | 
23 | See `Quick tips <http://ipython.scipy.org/doc/manual/node4.html>`__ for
24 | even more of a laundry list!
25 | 
26 | screen and VNC
27 | --------------
28 | 
29 | screen is a non-graphical tool for running multiple text windows in a single
30 | login session.
31 | 
32 | Features:
33 | 
34 |  - multiple windows w/hotkey switching
35 |  - copy/paste between windows
36 |  - detach/resume
37 | 
38 | VNC is a (free) graphical tool for persistent X Windows sessions (and
39 | Windows control, too).
40 | 
41 | To start: ::
42 | 
43 |   % vncserver
44 | 
45 | WARNING: Running VNC on an open network is a big security risk!!
46 | 
47 | Trac
48 | ----
49 | 
50 | Trac is a really nice-looking and friendly project management Web site.
51 | It integrates a Wiki with a version control repository browser, a
52 | ticket management system, and some simple roadmap controls.
53 | 
54 | In particular, you can:
55 | 
56 |  - browse the source code repository
57 |  - create tickets
58 |  - link checkin comments to specific tickets, revisions, etc.
59 |  - customize components, permissions, roadmaps, etc.
60 |  - view project status
61 | 
62 | It integrates well with subversion, which is "a better CVS".
63 | 


--------------------------------------------------------------------------------
/publish/useful-packages.txt:
--------------------------------------------------------------------------------
  1 | Useful Packages
  2 | ===============
  3 | 
  4 | subprocess
  5 | ----------
  6 | 
  7 | 'subprocess' is a new addition (Python 2.4), and it provides a convenient
  8 | and powerful way to run system commands.  (...and you should use it instead
  9 | of os.system, commands.getstatusoutput, or any of the Popen modules).
 10 | 
 11 | Unfortunately subprocess is a bi hard to use at the moment; I'm hoping
 12 | to help fix that for Python 2.6, but in the meantime here are some basic
 13 | commands.
 14 | 
 15 | Let's just try running a system command and retrieving the output:
 16 | 
 17 | >>> import subprocess
 18 | >>> p = subprocess.Popen(['/bin/echo', 'hello, world'], stdout=subprocess.PIPE)
 19 | >>> (stdout, stderr) = p.communicate()
 20 | >>> print stdout,
 21 | hello, world
 22 | 
 23 | What's going on is that we're starting a subprocess (running
 24 | '/bin/echo hello, world') and then asking for all of the output
 25 | aggregated together.
 26 | 
 27 | We could, for short strings, read directly from p.stdout (which is a file
 28 | handle):
 29 | 
 30 | >>> p = subprocess.Popen(['/bin/echo', 'hello, world'], stdout=subprocess.PIPE)
 31 | >>> print p.stdout.read(),
 32 | hello, world
 33 | 
 34 | but you could run into trouble here if the command returns a lot of data;
 35 | you should use communicate to get the output instead.
 36 | 
 37 | Let's do something a bit more complicated, just to show you that it's
 38 | possible: we're going to write to 'cat' (which is basically an echo chamber):
 39 | 
 40 | >>> from subprocess import PIPE
 41 | >>> p = subprocess.Popen(["/bin/cat"], stdin=PIPE, stdout=PIPE)
 42 | >>> (stdout, stderr) = p.communicate('hello, world')
 43 | >>> print stdout,
 44 | hello, world
 45 | 
 46 | There are a number of more complicated things you can do with subprocess --
 47 | like interact with the stdin and stdout of other processes -- but they
 48 | are fraught with peril.
 49 | 
 50 | rpy
 51 | ---
 52 | 
 53 | `rpy <http://rpy.sf.net/>`__ is an extension for R that lets R and
 54 | Python talk naturally.  For those of you that have never used R, it's
 55 | a very nice package that's mainly used for statistics, and it has *tons*
 56 | of libraries.
 57 | 
 58 | To use rpy, just ::
 59 | 
 60 |    from rpy import *
 61 | 
 62 | The most important symbol that will be imported is 'r', which lets you
 63 | run arbitrary R comments: ::
 64 | 
 65 |    r("command")
 66 | 
 67 | For example, if you wanted to run a principle component analysis, you could
 68 | do it like so: ::
 69 | 
 70 |    from rpy import *
 71 |    
 72 |    def plot_pca(filename):
 73 |        r("""data <- read.delim('%s', header=FALSE, sep=" ", nrows=5000)""" \
 74 |          % (filename,))
 75 |    
 76 |        r("""pca <- prcomp(data, scale=FALSE, center=FALSE)""")
 77 |        r("""pairs(pca$x[,1:3], pch=20)""")
 78 |    
 79 |    plot_pca('vectors.txt')
 80 | 
 81 | Now, the problem with this code is that I'm really just using Python to 
 82 | drive R, which seems inefficient.  You *can* go access the data directly
 83 | if you want; I'm just using R's loading features directly because they're
 84 | faster.  For example,
 85 | 
 86 |    x = r.pca['x']
 87 | 
 88 | is equivalent to 'x <- pca$x'.
 89 | 
 90 | matplotlib
 91 | ----------
 92 | 
 93 | `matplotlib <http://matplotlib.sf.net>`__ is a plotting package that
 94 | aims to make "simple things easy, and hard things possible".  It's got
 95 | a fair amount of matlab compatibility if you're into that.
 96 | 
 97 | Simple example: ::
 98 | 
 99 |    x = [ i**2 for i in range(0, 500) ]
100 |    hist(x, 100)
101 | 
102 | 
103 | 
104 | .. numpy/scipy
105 | .. matplotlib
106 | 


--------------------------------------------------------------------------------
/run-doctests.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import doctest
 3 | import sys
 4 | 
 5 | for filename in sys.argv[1:]:
 6 |     print '... running doctests on', filename
 7 |     doctest.testfile(filename)
 8 | 
 9 | print '*** SUCCESS ***'
10 | 


--------------------------------------------------------------------------------
/web-presentation.txt:
--------------------------------------------------------------------------------
 1 | Using the Web to Present Data
 2 | =============================
 3 | 
 4 | Static HTML
 5 | -----------
 6 | 
 7 | CGI scripts, dynamic data presentation, and simple form processing
 8 | ------------------------------------------------------------------
 9 | 
10 | WSGI
11 | ----
12 | 
13 | 
14 | 
15 | An introduction to CherryPy
16 | ---------------------------
17 | 
18 | (note security issues!)
19 | 
20 | Storing data in pickled object databases
21 | ----------------------------------------
22 | 
23 | shelve, bsddb
24 | 
25 | Storing data in Durus, an object database
26 | -----------------------------------------
27 | 
28 | durus
29 | 
30 | SQL databases
31 | -------------
32 | 
33 | mention zodb, sqlite, postgresql, mysql; quick examples of postgresql?
34 | mention sqlalchemy, too.
35 | 
36 | Database-backed Web programming
37 | -------------------------------
38 | 
39 | Remote procedure calls over the Web with XML-RPC
40 | ------------------------------------------------
41 | 


--------------------------------------------------------------------------------
/xml-parsing.txt:
--------------------------------------------------------------------------------
1 | XML parsing, the DOM, and ElementTree
2 | =====================================
3 | 
4 | See http://www.xml.com/pub/a/2003/02/12/py-xml.html
5 | 


--------------------------------------------------------------------------------