├── .gitignore ├── 00-cover.tex ├── 00-preface.tex ├── 01-intro.tex ├── 02-variables.tex ├── 03-conditional.tex ├── 04-functions.tex ├── 05-iterations.tex ├── 06-strings.tex ├── 07-files.tex ├── 08-lists.tex ├── 09-dictionaries.tex ├── 10-tuples.tex ├── 11-regex.tex ├── 12-network.tex ├── 13-web.tex ├── 14-database.tex ├── 15-viz.tex ├── 16-tasks.tex ├── AA-windows.tex ├── AB-apple.tex ├── AC-contrib.tex ├── AD-copyright.tex ├── BeautifulSoup.py ├── CALIBRE.md ├── README.md ├── archive ├── book.tex ├── cfbook.tex ├── extra.tex ├── extra2.tex ├── objectchaps.tex ├── think-2008-downey.pdf └── think-pre-2008-three.pdf ├── book.pdf ├── book.sh ├── book.tex ├── book_009.pdf ├── book_270.epub ├── book_270.mobi ├── book_270.pdf ├── code ├── AA_NOTE.txt ├── BeautifulSoup.py ├── argfile.py ├── argtest.py ├── avelist.py ├── avenum.py ├── average.py ├── celsius.py ├── cleanup.sh ├── copytildone.py ├── count1.py ├── count2.py ├── count3.py ├── curl1.py ├── curl2.py ├── curl3.py ├── db1.py ├── db2.py ├── egg.py ├── fahren.py ├── geodata │ ├── README.txt │ ├── geodump.py │ ├── geoload.py │ ├── where.data │ ├── where.html │ └── where.js ├── geojson.py ├── geoxml.py ├── gmane │ ├── README.txt │ ├── d3.layout.cloud.js │ ├── d3.v2.js │ ├── gbasic.py │ ├── gline.htm │ ├── gline.py │ ├── gmane.py │ ├── gmodel.py │ ├── gword.htm │ └── gword.py ├── grade.py ├── graphics │ ├── graphics.py │ └── histogram.py ├── greet.py ├── grep.py ├── hidden.py ├── json1.py ├── json2.py ├── largest.py ├── mailcount.py ├── mailtop.py ├── mbox-short.txt ├── mbox.txt ├── oauth.py ├── old-twitter │ ├── twdump.py │ ├── twfriends.py │ ├── twitter1.py │ ├── twitter2.py │ ├── twitter3.py │ ├── twitter4.py │ ├── twjoin.py │ └── twspider.py ├── open.py ├── pagerank │ ├── BeautifulSoup.py │ ├── LICENSE │ ├── README.txt │ ├── d3.v2.js │ ├── force.css │ ├── force.html │ ├── force.js │ ├── spdump.py │ ├── spider.js │ ├── spider.py │ ├── spjson.py │ ├── sprank.py │ └── spreset.py ├── pals.py ├── pay.py ├── pay2.py ├── pay3.py ├── re01.py ├── re02.py ├── re03.py ├── re04.py ├── re05.py ├── re06.py ├── re07.py ├── re08.py ├── re09.py ├── re10.py ├── re11.py ├── re12.py ├── re13.py ├── re14.py ├── romeo-full.txt ├── romeo.txt ├── search1.py ├── search10.py ├── search2.py ├── search3.py ├── search4.py ├── search5.py ├── search6.py ├── search7.py ├── search8.py ├── search9.py ├── sequence.py ├── socket1.py ├── socket2.py ├── soft.py ├── spamave.py ├── twdump.py ├── twfriends.py ├── twitter1.py ├── twitter2.py ├── twjoin.py ├── twspider.py ├── twurl.py ├── txtcheck.py ├── txtcheck2.py ├── txtcheck3.py ├── txtcount.py ├── txtdelete.py ├── txtmd5.py ├── txtsize.py ├── urljpeg.py ├── urllib1.py ├── urllib2.py ├── urllink2.py ├── urllink3.py ├── urllinks.py ├── urlregex.py ├── urlwords.py ├── whathour.py ├── wikidata.db ├── wikigrade.py ├── wordlist.py ├── words.py ├── words.txt ├── xml1.py └── xml2.py ├── cover ├── JavaForumFragmentWhite.jpg ├── UM1.svg ├── backtext.docx ├── backtext_es.rtf ├── backtext_ko.docx ├── cover.eps ├── cover.fig ├── cover_um_lib_003_front.jpg ├── cover_um_lib_003_full.jpg ├── cover_um_lib_003_small.jpg ├── epub_cover.jpg ├── greedyalloc.jpg ├── isbn.txt ├── network-um-nsf-9-99-10-09-nodes.eps ├── network-um-nsf-9-99-10-09-nodes.svg ├── network-um-nsf-9-99-10-09.eps ├── network-um-nsf-9-99-10-09.svg └── politicalblogs.jpg ├── createspace.sty ├── createspace ├── BookCoverPreview.jpeg ├── BookCoverPreviewFront.jpg ├── Python para informaticos_ Explo - Charles Severance.epub ├── book-2017-05-15.pdf ├── book_272_es.pdf ├── book_272_es2.pdf └── book_272_es3.pdf ├── figs2 ├── arch.eps ├── arch.fig ├── arch2.eps ├── arch2.fig ├── arch3.eps ├── arch3.fig ├── by-sa.eps ├── by-sa.fig ├── by-sa.png ├── cursor.eps ├── cursor.svg ├── elif.eps ├── elif.svg ├── google-map.eps ├── google-map.png ├── handle.eps ├── handle.svg ├── if-else.eps ├── if-else.svg ├── if.eps ├── if.svg ├── join.eps ├── join.svg ├── list1.eps ├── list1.fig ├── mailorg.eps ├── mailorg.png ├── nested.eps ├── nested.svg ├── pagerank.eps ├── pagerank.png ├── pda.eps ├── pda.fig ├── pda2.eps ├── pda2.fig ├── relational.eps ├── relational.svg ├── soa.eps ├── soa.svg ├── socket.eps ├── socket.svg ├── sqlite.eps ├── sqlite.png ├── string.eps ├── string.svg ├── tracks.eps ├── tracks.svg ├── twitter.eps ├── twitter.svg ├── wordcloud.eps ├── wordcloud.png ├── xml-tree.eps └── xml-tree.svg ├── fixhtml.py ├── hevea.sty ├── html-snap ├── cfbook.html ├── cfbook001.html ├── cfbook001.png ├── cfbook002.html ├── cfbook002.png ├── cfbook003.html ├── cfbook003.png ├── cfbook004.html ├── cfbook004.png ├── cfbook005.html ├── cfbook005.png ├── cfbook006.html ├── cfbook006.png ├── cfbook007.html ├── cfbook007.png ├── cfbook008.html ├── cfbook008.png ├── cfbook009.html ├── cfbook009.png ├── cfbook010.html ├── cfbook010.png ├── cfbook011.html ├── cfbook011.png ├── cfbook012.html ├── cfbook012.png ├── cfbook013.html ├── cfbook013.png ├── cfbook014.html ├── cfbook014.png ├── cfbook015.html ├── cfbook015.png ├── cfbook016.html ├── cfbook016.png ├── cfbook017.html ├── cfbook017.png ├── cfbook018.html ├── cfbook018.png ├── cfbook019.html ├── cfbook019.png ├── cfbook020.html ├── cfbook020.png ├── cfbook021.html ├── cfbook021.png ├── cfbook022.html ├── cfbook022.png ├── cfbook023.png ├── cfbook024.png ├── contents_motif.gif ├── index.html ├── next_motif.gif └── previous_motif.gif ├── html.sh ├── html_270.zip ├── htmlonly ├── latexonly ├── notes └── allen-downey-permission.txt ├── png.hva ├── spanish.hva ├── translations └── KO │ └── book_009_ko.pdf └── typos.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.sqlite 3 | *.pyc 4 | *.zip 5 | code 6 | 7 | -------------------------------------------------------------------------------- /00-cover.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009- Charles R. Severance, All Righs Reserved 3 | 4 | % LATEXONLY 5 | 6 | \input{latexonly} 7 | 8 | \newtheorem{ex}{Exercise}[chapter] 9 | 10 | \begin{latexonly} 11 | 12 | \renewcommand{\blankpage}{\thispagestyle{empty} \quad \newpage} 13 | 14 | \thispagestyle{empty} 15 | 16 | \begin{flushright} 17 | \vspace*{2.0in} 18 | 19 | \begin{spacing}{3} 20 | {\huge Python for Informatics}\\ 21 | {\Large Exploring Information} 22 | \end{spacing} 23 | 24 | \vspace{0.25in} 25 | 26 | Version \theversion 27 | 28 | \vspace{0.5in} 29 | 30 | 31 | {\Large 32 | Charles Severance\\ 33 | } 34 | 35 | \vfill 36 | 37 | \end{flushright} 38 | 39 | %--copyright-------------------------------------------------- 40 | \pagebreak 41 | \thispagestyle{empty} 42 | 43 | {\small 44 | Copyright \copyright ~2009- Charles Severance. 45 | 46 | 47 | Printing history: 48 | 49 | \begin{description} 50 | 51 | \item[May 2015:] Editorial pass thanks to Sue Blumenberg. 52 | 53 | \item[October 2013:] Major revision to Chapters 13 and 14 54 | to switch to JSON and use OAuth. 55 | Added new chapter on Visualization. 56 | 57 | \item[September 2013:] Published book on Amazon CreateSpace 58 | 59 | \item[January 2010:] Published book using the University of 60 | Michigan Espresso Book machine. 61 | 62 | \item[December 2009:] Major revision to chapters 2-10 from 63 | \emph{Think Python: How to Think Like 64 | a Computer Scientist} 65 | and writing chapters 1 and 11-15 to 66 | produce 67 | \emph{Python for Informatics: Exploring Information} 68 | 69 | \item[June 2008:] Major revision, changed title to 70 | \emph{Think Python: How to Think Like 71 | a Computer Scientist}. 72 | 73 | \item[August 2007:] Major revision, changed title to 74 | \emph{How to Think Like a (Python) Programmer}. 75 | 76 | \item[April 2002:] First edition of \emph{How to Think Like 77 | a Computer Scientist}. 78 | 79 | \end{description} 80 | 81 | \vspace{0.2in} 82 | 83 | This work is licensed under a 84 | Creative Common 85 | Attribution-NonCommercial-ShareAlike 3.0 Unported License. 86 | This license is 87 | available at 88 | \url{creativecommons.org/licenses/by-nc-sa/3.0/}. You can 89 | see what the author considers commercial and non-commercial 90 | uses of this material as well as license exemptions 91 | in the Appendix titled Copyright Detail. 92 | 93 | The \LaTeX\ source for the 94 | \emph{Think Python: How to Think Like 95 | a Computer Scientist} 96 | version of this book is available from 97 | \url{http://www.thinkpython.com}. 98 | 99 | \vspace{0.2in} 100 | 101 | } % end small 102 | 103 | \end{latexonly} 104 | 105 | 106 | % HTMLONLY 107 | 108 | \begin{htmlonly} 109 | 110 | % TITLE PAGE FOR HTML VERSION 111 | 112 | {\Large \thetitle} 113 | 114 | {\large 115 | Charles Severance} 116 | 117 | Version \theversion 118 | 119 | \setcounter{chapter}{-1} 120 | 121 | \end{htmlonly} 122 | -------------------------------------------------------------------------------- /00-preface.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009- Charles R. Severance, All Righs Reserved 3 | 4 | \chapter{Preface} 5 | 6 | \section*{Python for Informatics: Remixing an Open Book} 7 | 8 | It is quite natural for academics who are continuously told to 9 | ``publish or perish'' to want to always create something from scratch 10 | that is their own fresh creation. This book is an 11 | experiment in not starting from scratch, but instead ``remixing'' 12 | the book titled 13 | \emph{Think Python: How to Think Like 14 | a Computer Scientist} 15 | written by Allen B. Downey, Jeff Elkner, and others. 16 | 17 | In December of 2009, I was preparing to teach 18 | {\bf SI502 - Networked Programming} at the University of Michigan 19 | for the fifth semester in a row and decided it was time 20 | to write a Python textbook that focused on exploring data 21 | instead of understanding algorithms and abstractions. 22 | My goal in SI502 is to teach people lifelong data handling 23 | skills using Python. Few of my 24 | students were planning to be professional 25 | computer programmers. Instead, they 26 | planned to be librarians, managers, lawyers, biologists, economists, etc., 27 | who happened to want to skillfully use technology in their chosen field. 28 | 29 | I never seemed to find the perfect data-oriented Python 30 | book for my course, so I set out 31 | to write just such a book. Luckily at a faculty meeting three weeks 32 | before I was about to start my new book from scratch over 33 | the holiday break, 34 | Dr. Atul Prakash showed me the \emph{Think Python} book which he had 35 | used to teach his Python course that semester. 36 | It is a well-written Computer Science text with a focus on 37 | short, direct explanations and ease of learning. 38 | 39 | The overall book structure 40 | has been changed to get to doing data analysis problems as quickly as 41 | possible and have a series of running examples and exercises 42 | about data analysis from the very beginning. 43 | 44 | Chapters 2--10 are similar to the \emph{Think Python} book, 45 | but there have been major changes. Number-oriented examples and 46 | exercises have been replaced with data-oriented exercises. 47 | Topics are presented in the order needed to build increasingly 48 | sophisticated data analysis solutions. Some topics like {\tt try} and 49 | {\tt except} are pulled forward and presented as part of the chapter 50 | on conditionals. Functions are given very light treatment until 51 | they are needed to handle program complexity rather than introduced 52 | as an early lesson in abstraction. Nearly all user-defined functions 53 | have been removed from the example code and exercises outside of Chapter 4. 54 | The word ``recursion''\footnote{Except, of course, for this line.} 55 | does not appear in the book at all. 56 | 57 | In chapters 1 and 11--16, all of the material is brand new, focusing 58 | on real-world uses and simple examples of Python for data analysis 59 | including regular expressions for searching and parsing, 60 | automating tasks on your computer, retrieving data across 61 | the network, scraping web pages for data, 62 | using web services, parsing XML and JSON data, and creating 63 | and using databases using Structured Query Language. 64 | 65 | The ultimate goal of all of these changes is a shift from a 66 | Computer Science to an Informatics 67 | focus is to only include topics into a first technology 68 | class that can be useful even if one chooses not to 69 | become a professional programmer. 70 | 71 | Students who find this book interesting and want to further explore 72 | should look at Allen B. Downey's \emph{Think Python} book. Because there 73 | is a lot of overlap between the two books, 74 | students will quickly pick up skills in the additional 75 | areas of technical programming and algorithmic thinking 76 | that are covered in \emph{Think Python}. 77 | And given that the books have a similar writing style, they should be 78 | able to move quickly through \emph{Think Python} with a minimum of effort. 79 | 80 | \index{Creative Commons License} 81 | \index{CC-BY-SA} 82 | \index{BY-SA} 83 | As the copyright holder of \emph{Think Python}, 84 | Allen has given me permission to change the book's license 85 | on the material from his book that remains in this book 86 | from the 87 | GNU Free Documentation License 88 | to the more recent 89 | Creative Commons Attribution --- Share Alike 90 | license. 91 | This follows a general shift in open documentation licenses moving 92 | from the GFDL to the CC-BY-SA (e.g., Wikipedia). 93 | Using the CC-BY-SA license maintains the book's 94 | strong copyleft tradition while making it even more straightforward 95 | for new authors to reuse this material as they see fit. 96 | 97 | I feel that this book serves an example of why open 98 | materials are so important to the future of education, 99 | and want to thank Allen B. Downey and Cambridge University 100 | Press for their forward-looking decision to make the book available 101 | under an open copyright. I hope they are pleased with the 102 | results of my efforts and I hope that you the reader are pleased with 103 | \emph{our} collective efforts. 104 | 105 | I would like to thank Allen B. Downey and Lauren Cowles for their help, 106 | patience, and guidance in dealing with and resolving the copyright 107 | issues around this book. 108 | 109 | Charles Severance\\ 110 | www.dr-chuck.com\\ 111 | Ann Arbor, MI, USA\\ 112 | September 9, 2013 113 | 114 | Charles Severance is a 115 | Clinical Associate Professor 116 | at the University of Michigan School of Information. 117 | 118 | \clearemptydoublepage 119 | 120 | % TABLE OF CONTENTS 121 | \begin{latexonly} 122 | 123 | \tableofcontents 124 | 125 | \clearemptydoublepage 126 | 127 | \end{latexonly} 128 | 129 | % START THE BOOK 130 | \mainmatter 131 | 132 | -------------------------------------------------------------------------------- /AA-windows.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009- Charles R. Severance, All Righs Reserved 3 | 4 | \chapter{Python Programming on Windows} 5 | 6 | In this appendix, we walk through a series of steps 7 | so you can run Python on Windows. There are many different 8 | approaches you can take, and this is just one 9 | approach to keep things simple. 10 | 11 | First, you need to install a programmer editor. You 12 | do not want to use Notepad or Microsoft Word to edit 13 | Python programs. Programs must be in "flat-text" files 14 | and so you need an editor that is good at 15 | editing text files. 16 | 17 | Our recommended editor for Windows is NotePad++ which 18 | can be downloaded and installed from: 19 | 20 | \url{https://notepad-plus-plus.org/} 21 | 22 | Then download a recent version of Python 2 from the 23 | \url{www.python.org} web site. 24 | 25 | \url{https://www.python.org/downloads/} 26 | 27 | Once you have installed Python, you should have a new 28 | folder on your computer like {\tt C:{\textbackslash}Python27}. 29 | 30 | To create a Python program, run NotePad++ from the Start Menu 31 | and save the file with a suffix of ``.py''. For this 32 | exercise, put a folder on your Desktop named 33 | {\tt py4inf}. It is best to keep your folder names short 34 | and not to have any spaces in your folder or file name. 35 | 36 | Let's make our first Python program be: 37 | 38 | \beforeverb 39 | \begin{verbatim} 40 | print 'Hello Chuck' 41 | \end{verbatim} 42 | \afterverb 43 | % 44 | Except that you should change it to be your name. Save the file 45 | into {\tt Desktop{\textbackslash}py4inf{\textbackslash}prog1.py}. 46 | 47 | Then open a command-line window. Different versions of Windows 48 | do this differently: 49 | 50 | \begin{itemize} 51 | \item Windows Vista and Windows 7: Press {\bf Start} 52 | and then in the command search window enter the word 53 | {\tt command} and press enter. 54 | 55 | \item Windows XP: Press {\bf Start}, then {\bf Run}, and 56 | then enter {\tt cmd} in the dialog box and press {\bf OK}. 57 | \end{itemize} 58 | 59 | You will find yourself in a text window with a prompt that 60 | tells you what folder you are currently ``in''. 61 | 62 | Windows Vista and Windows-7: {\tt C:{\textbackslash}Users{\textbackslash}csev}\\ 63 | Windows XP: {\tt C:{\textbackslash}Documents and Settings{\textbackslash}csev} 64 | 65 | This is your ``home directory''. Now we need to move into 66 | the folder where you have saved your Python program using 67 | the following commands: 68 | 69 | \beforeverb 70 | \begin{verbatim} 71 | C:\Users\csev\> cd Desktop 72 | C:\Users\csev\Desktop> cd py4inf 73 | \end{verbatim} 74 | \afterverb 75 | % 76 | Then type 77 | 78 | \beforeverb 79 | \begin{verbatim} 80 | C:\Users\csev\Desktop\py4inf> dir 81 | \end{verbatim} 82 | \afterverb 83 | % 84 | to list your files. You should see the {\tt prog1.py} when 85 | you type the {\tt dir} command. 86 | 87 | To run your program, simply type the name of your file at the 88 | command prompt and press enter. 89 | 90 | \beforeverb 91 | \begin{verbatim} 92 | C:\Users\csev\Desktop\py4inf> prog1.py 93 | Hello Chuck 94 | C:\Users\csev\Desktop\py4inf> 95 | \end{verbatim} 96 | \afterverb 97 | % 98 | You can edit the file in NotePad++, save it, and then switch back 99 | to the command line and execute the program again by typing 100 | the file name again at the command-line prompt. 101 | 102 | If you get confused in the command-line window, just close it 103 | and open a new one. 104 | 105 | Hint: You can also press the ``up arrow'' at the command line to 106 | scroll back and run a previously entered command again. 107 | 108 | You should also look in the preferences for NotePad++ and set it 109 | to expand tab characters to be four spaces. This will save you lots 110 | of effort looking for indentation errors. 111 | 112 | You can also find further information on editing and running 113 | Python programs at \url{www.py4inf.com}. 114 | 115 | -------------------------------------------------------------------------------- /AB-apple.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009- Charles R. Severance, All Righs Reserved 3 | 4 | \chapter{Python Programming on Macintosh} 5 | 6 | In this appendix, we walk through a series of steps 7 | so you can run Python on Macintosh. Since Python is 8 | already included in the Macintosh Operating system, we only 9 | need to learn how to edit Python files and run Python programs 10 | in the terminal window. 11 | 12 | There are many approaches you can take to editing and running 13 | Python programs, and this is just one approach we have found 14 | to be very simple. 15 | 16 | First, you need to install a programmer editor. You 17 | do not want to use TextEdit or Microsoft Word to edit 18 | Python programs. Programs must be in "flat-text" files 19 | and so you need an editor that is good at 20 | editing text files. 21 | 22 | Our recommended editor for Macintosh is TextWrangler which 23 | can be downloaded and installed from: 24 | 25 | \url{http://www.barebones.com/products/TextWrangler/} 26 | 27 | To create a Python program, run 28 | {\bf TextWrangler} from your {\bf Applications} folder. 29 | 30 | Let's make our first Python program be: 31 | 32 | \beforeverb 33 | \begin{verbatim} 34 | print 'Hello Chuck' 35 | \end{verbatim} 36 | \afterverb 37 | % 38 | Except that you should change it to be your name. 39 | Save the file in a folder on your Desktop named 40 | {\tt py4inf}. It is best to keep your folder names short 41 | and not to have any spaces in your folder or file name. 42 | Once you have made the folder, save the file 43 | into {\tt Desktop{\textbackslash}py4inf{\textbackslash}prog1.py}. 44 | 45 | Then run the {\bf Terminal} program. The easiest way is to 46 | press the Spotlight icon (the magnifying glass) in the upper 47 | right of your screen, enter ``terminal'', and launch the 48 | application that comes up. 49 | 50 | You start in your ``home directory''. You can see the current 51 | directory by typing the {\tt pwd} command in the terminal window. 52 | 53 | \beforeverb 54 | \begin{verbatim} 55 | 67-194-80-15:~ csev$ pwd 56 | /Users/csev 57 | 67-194-80-15:~ csev$ 58 | \end{verbatim} 59 | \afterverb 60 | % 61 | you must be in the folder that contains your Python program 62 | to run the program. Use the {\tt cd} command to move to a new 63 | folder and then the {\tt ls} command to list the files in the 64 | folder. 65 | 66 | \beforeverb 67 | \begin{verbatim} 68 | 67-194-80-15:~ csev$ cd Desktop 69 | 67-194-80-15:Desktop csev$ cd py4inf 70 | 67-194-80-15:py4inf csev$ ls 71 | prog1.py 72 | 67-194-80-15:py4inf csev$ 73 | \end{verbatim} 74 | \afterverb 75 | % 76 | To run your program, simply type the {\tt python} command followed 77 | by the name of your file at the command prompt and press enter. 78 | 79 | \beforeverb 80 | \begin{verbatim} 81 | 67-194-80-15:py4inf csev$ python prog1.py 82 | Hello Chuck 83 | 67-194-80-15:py4inf csev$ 84 | \end{verbatim} 85 | \afterverb 86 | % 87 | You can edit the file in TextWrangler, save it, and then switch back 88 | to the command line and execute the program again by typing 89 | the file name again at the command-line prompt. 90 | 91 | If you get confused in the command-line window, just close it 92 | and open a new one. 93 | 94 | Hint: You can also press the ``up-arrow'' in the command line to 95 | scroll back and run a previously entered command again. 96 | 97 | You should also look in the preferences for TextWrangler and set it 98 | to expand tab characters to be four spaces. It will save you lots 99 | of effort looking for indentation errors. 100 | 101 | You can also find further information on editing and running 102 | Python programs at \url{www.py4inf.com}. 103 | 104 | 105 | -------------------------------------------------------------------------------- /AD-copyright.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009- Charles R. Severance, All Righs Reserved 3 | 4 | \chapter{Copyright Detail} 5 | 6 | This work is licensed under a 7 | Creative Common 8 | Attribution-NonCommercial-ShareAlike 3.0 Unported License. 9 | This license is 10 | available at 11 | \url{creativecommons.org/licenses/by-nc-sa/3.0/}. 12 | 13 | I would have preferred to license the book under the less 14 | restrictive CC-BY-SA license. But unfortunately there are 15 | a few unscrupulous 16 | organizations who search for and find freely licensed books, 17 | and then publish and sell virtually unchanged copies of the books on a 18 | print on demand service such as LuLu or CreateSpace. CreateSpace 19 | has (thankfully) added a policy that gives the wishes of the actual 20 | copyright holder preference over a non-copyright holder attempting 21 | to publish a freely licensed work. Unfortunately there are many 22 | print-on-demand services and very few have as well-considered a policy 23 | as CreateSpace. 24 | 25 | Regretfully, I added the NC element to the license 26 | this book to give me recourse in case someone tries to clone this 27 | book and sell it commercially. Unfortunately, adding NC limits uses 28 | of this material that I would like to permit. So I have added this 29 | section of the document to describe specific situations where 30 | I am giving my permission in advance to use the material in this book 31 | in situations that some might consider commercial. 32 | 33 | \begin{itemize} 34 | \item If you are printing a limited number of copies of all or part of 35 | this book for use in a course (e.g., like a coursepack), then 36 | you are granted CC-BY license to these materials for that purpose. 37 | 38 | \item If you are a teacher at a university and you translate this book 39 | into a language other than English and teach using the translated book, then 40 | you can contact me and I will granted you a CC-BY-SA 41 | license to these materials with respect to the publication of your 42 | translation. In particular, you will be permitted 43 | to sell the resulting translated book commercially. 44 | \end{itemize} 45 | 46 | If you are intending to translate the book, you may want to contact me 47 | so we can make sure that you have all of the related course materials so 48 | you can translate them as well. 49 | 50 | Of course, you are welcome to contact me and ask for permission if these 51 | clauses are not sufficient. In all cases, permission to reuse and 52 | remix this material will be granted as long as there is clear added value 53 | or benefit to students or teachers that will accrue as a result of the 54 | new work. 55 | 56 | Charles Severance\\ 57 | www.dr-chuck.com\\ 58 | Ann Arbor, MI, USA\\ 59 | September 9, 2013 60 | 61 | 62 | 63 | \normalsize 64 | 65 | \printindex 66 | 67 | \clearemptydoublepage 68 | 69 | 70 | \end{document} 71 | -------------------------------------------------------------------------------- /CALIBRE.md: -------------------------------------------------------------------------------- 1 | How TO Publish in CALIBRE 2 | ------------------------- 3 | 4 | Make the HTML into a zip file. 5 | 6 | Drag the ZIP file into Calibre 7 | 8 | Convert to EPUB - In the dialog that popps up 9 | - Set title and author 10 | - Set Cover 11 | - Do *not* do heuristic processing 12 | - Under Structure Detection, Blank out the Insert page breaks before regex 13 | 14 | Use Calibre to do the MOBI conversion - just uploadig the EPUB seems 15 | to lose KDP awareness of the table of contents. 16 | 17 | Save To Disk in a Single Directory 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python for Informatics: Exploring Information 2 | ============================================= 3 | 4 | This is the source code for "Python for Informatics: Exploring Information" 5 | the web site for this book is http://www.pythonlearn.com/ 6 | 7 | LaTeX Files 8 | ----------- 9 | 10 | The source file for the book is *book.tex* - this file includes the 11 | per-chapter files *00-cover.tex* through *AD-copyright.tex* 12 | 13 | Workflow 14 | -------- 15 | 16 | Once you have LaTeX and HeVeA installed properly the workflow is simple. 17 | To produce the PDF version of the book you type: 18 | 19 | bash book.sh 20 | 21 | This leaves the output on *book.pdf* and if you are on a Mac or Linux, it 22 | even attempts to open the PDF viewer for your system. 23 | 24 | To produce the HTML version of the book you type: 25 | 26 | bash html.sh 27 | 28 | This produces files in the *html* folder. This folder contains the book, chapters in 29 | HTML and the images for the book. 30 | 31 | To make EPUB or MOBI files I use the Calibre software. The steps that I take in Caliper 32 | are here: 33 | 34 | * [Importing HTML into Calibre](CALIBRE.md) 35 | 36 | I also have a server that builds the latest version from this repository at this URL: 37 | 38 | * http://do1.dr-chuck.com/py4inf/EN-us/ 39 | 40 | I don't yet have the files that make up the build server checked in because it is 41 | still a bit of a hack. If you want to set up your own build server - I will check 42 | the files in. 43 | 44 | Software Installation - Macintosh 45 | --------------------------------- 46 | 47 | Running the script to produce the PDF is really easy and convenent on the Mac. Simply 48 | install this software: 49 | 50 | * https://tug.org/mactex/ 51 | 52 | Make sure to install the extras as well. If you have a recent Mac you **cannot** make 53 | the binary download of *hevea* work as it is a PowerPC binary. If you want to do the HTML 54 | generation, you need a variant of Linux. 55 | 56 | Software Installation - Ubuntu 57 | ------------------------------ 58 | 59 | This is the rough set of steps I use on Ubuntu: 60 | 61 | sudo apt-get install texlive-latex-base 62 | sudo apt-get install texlive-latex-recommended 63 | sudo apt-get install texlive-fonts-recommended 64 | sudo apt-get install texlive-latex-extra 65 | sudo apt-get install hevea 66 | sudo apt-get install imagemagick 67 | sudo apt-get install texlive-fonts-extra 68 | 69 | sudo apt-get install texlive-lang-spanish texlive-doc-es 70 | 71 | You could put them all on one long apt-get, but I like to see if they work :) 72 | 73 | Once this is done, the *book.sh* and *html.sh* should both work just fine. For 74 | my own sanity, I have Parallels with an Ubuntu image that I can use to generate 75 | HTML. It was easier than keeping a four-year-old MacBook running with Rosetta 76 | support. 77 | 78 | Translating This Book 79 | --------------------- 80 | 81 | This book is available with a 82 | Creative Commons 83 | Attribution-NonCommercial-ShareAlike 3.0 Unported License. So as long as you 84 | are not intending to profit from the translation, no permission to translate 85 | and publish is needed. If you want to sell the resulting translated book 86 | commercially, please see the Appendix on Copyright and contact me. 87 | 88 | Here are some of the translations in-progress: 89 | 90 | * Korean - [Formatted Book](http://do1.dr-chuck.com/py4inf/KO-ko/book.pdf) | [Book Source](https://github.com/statkclee/py4inf-kor) (Lead: Victor KC Lee) 91 | * Italian - [Google Doc](https://docs.google.com/document/d/1ZyxzXGe2qGgsc-Dbqs-pXvQFPKbpJfLs1cq2gUFkxqw/edit?usp=sharing) (Lead: Mauro Toselli) 92 | * Spanish - [Formatted Book](http://do1.dr-chuck.com/py4inf/ES-es/) | [Book Source](https://github.com/hedemarrie/py4inf-esp) (Lead: Hedemarrie Dussan) 93 | 94 | Feel free to send me a link (or just edit this page and send me a Pull Request). 95 | 96 | You can use any technology you like LaTeX, Google Docs, WikiBook or whatever you choose. 97 | 98 | If you can figure out LaTeX, the easiest way to translate the book is to fork 99 | my repo on GitHub and start translating in your own repo. That way it will be easier 100 | to catch up with changes I make to the English version of the book. 101 | 102 | If you start a translation in github, please contact me so I can add it to my automatic 103 | build process: 104 | 105 | * http://do1.dr-chuck.com/py4inf/ 106 | 107 | This way your latest work will be easily found by students and linked from my web site 108 | once the translation is under way. 109 | 110 | TO DO 111 | ----- 112 | 113 | I need to document and check in the code to run a build server. The build server 114 | is another way for a MacBook user without HeVeA to develop. Edit locally, check 115 | the PDF and then check in the changes wait a tick and then the HTML is made in 116 | the build server. 117 | 118 | I have no idea how LaTeX works on Windows. I would be happy to get a PR 119 | with some documentation. 120 | 121 | Chuck Severance - 122 | Mon Aug 18 22:20:12 EDT 2014 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /archive/think-2008-downey.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/archive/think-2008-downey.pdf -------------------------------------------------------------------------------- /archive/think-pre-2008-three.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/archive/think-pre-2008-three.pdf -------------------------------------------------------------------------------- /book.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/book.pdf -------------------------------------------------------------------------------- /book.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | rm *.tmp *.aux 4 | latex book 5 | makeindex book 6 | latex book 7 | dvipdf book.dvi book.pdf 8 | if [[ "$OSTYPE" == "darwin"* ]]; then 9 | open book.pdf 10 | elif [[ "$OSTYPE" == "linux-gnu" && -n "$DISPLAY" ]]; then 11 | xdg-open book.pdf 12 | else 13 | echo "Output on book.pdf" 14 | fi 15 | echo Removed temporary files 16 | rm -f book.aux book.ind book.ilg book.log book.dvi book.idx book.toc book.haux book.hind book.image.tex book.tmp book.idv book.4tc book.lg book.xref 17 | -------------------------------------------------------------------------------- /book.tex: -------------------------------------------------------------------------------- 1 | % The contents of this file is 2 | % Copyright (c) 2009-2011 Charles R. Severance, All Righs Reserved 3 | 4 | %\documentclass[10pt,b5paper]{book} 5 | \documentclass[11pt]{book} 6 | % \usepackage[width=5.25in,height=7.50in,hmarginratio=3:2,vmarginratio=1:1]{geometry} 7 | \usepackage[size=journal,gutter=0.75in,trim,bleed]{createspace} 8 | 9 | \usepackage{pslatex} 10 | \usepackage{url} 11 | \usepackage{fancyhdr} 12 | \usepackage{graphicx} 13 | \usepackage{amsmath, amsthm, amssymb} 14 | \usepackage{exercise} 15 | \usepackage{makeidx} 16 | \usepackage{setspace} 17 | \usepackage{hevea} 18 | \usepackage{alltt} 19 | \usepackage{upquote} 20 | 21 | \newcommand{\thetitle}{Python for Informatics: Exploring Information} 22 | \newcommand{\theversion}{2.7.3} 23 | 24 | \makeindex 25 | 26 | \begin{document} 27 | 28 | \frontmatter 29 | 30 | \input{00-cover} 31 | \input{00-preface} 32 | 33 | % START THE BOOK 34 | \mainmatter 35 | 36 | \input{01-intro} 37 | \input{02-variables.tex} 38 | \input{03-conditional.tex} 39 | \input{04-functions.tex} 40 | \input{05-iterations.tex} 41 | \input{06-strings.tex} 42 | \input{07-files.tex} 43 | \input{08-lists.tex} 44 | \input{09-dictionaries.tex} 45 | \input{10-tuples.tex} 46 | \input{11-regex} 47 | \input{12-network} 48 | \input{13-web} 49 | \input{14-database} 50 | \input{15-viz} 51 | \input{16-tasks} 52 | 53 | \appendix 54 | 55 | \input{AA-windows} 56 | \input{AB-apple} 57 | \input{AC-contrib} 58 | \input{AD-copyright} 59 | 60 | \normalsize 61 | 62 | \printindex 63 | 64 | \clearemptydoublepage 65 | 66 | \end{document} 67 | -------------------------------------------------------------------------------- /book_009.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/book_009.pdf -------------------------------------------------------------------------------- /book_270.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/book_270.epub -------------------------------------------------------------------------------- /book_270.mobi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/book_270.mobi -------------------------------------------------------------------------------- /book_270.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/book_270.pdf -------------------------------------------------------------------------------- /code/AA_NOTE.txt: -------------------------------------------------------------------------------- 1 | Note that the real copies of this stuff is in 2 | the pythonlearn github 3 | 4 | -------------------------------------------------------------------------------- /code/argfile.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | name = sys.argv[1] 4 | handle = open(name, 'r') 5 | text = handle.read() 6 | print name, 'is', len(text), 'bytes' 7 | -------------------------------------------------------------------------------- /code/argtest.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | print 'Count:', len(sys.argv) 4 | print 'Type:', type(sys.argv) 5 | 6 | for arg in sys.argv: 7 | print 'Argument:', arg 8 | 9 | -------------------------------------------------------------------------------- /code/avelist.py: -------------------------------------------------------------------------------- 1 | numlist = list() 2 | while ( True ) : 3 | inp = raw_input('Enter a number: ') 4 | if inp == 'done' : break 5 | value = float(inp) 6 | numlist.append(value) 7 | 8 | average = sum(numlist) / len(numlist) 9 | print 'Average:', average 10 | -------------------------------------------------------------------------------- /code/avenum.py: -------------------------------------------------------------------------------- 1 | total = 0 2 | count = 0 3 | while ( True ) : 4 | inp = raw_input('Enter a number: ') 5 | if inp == 'done' : break 6 | value = float(inp) 7 | total = total + value 8 | count = count + 1 9 | 10 | average = total / count 11 | print 'Average:', average 12 | -------------------------------------------------------------------------------- /code/average.py: -------------------------------------------------------------------------------- 1 | total = 0 2 | count = 0 3 | while ( True ) : 4 | inp = raw_input('Enter a number: ') 5 | if inp == 'done' : 6 | break 7 | try: 8 | value = float(inp) 9 | except: 10 | print 'Invalid input' 11 | continue 12 | total = total + value 13 | count = count + 1 14 | 15 | average = total / count 16 | print 'Average:', average 17 | -------------------------------------------------------------------------------- /code/celsius.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter Celsius Temperature:') 2 | cel = float(inp) 3 | fahr = ( cel * 9.0 ) / 5.0 + 32.0 4 | print fahr 5 | -------------------------------------------------------------------------------- /code/cleanup.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | rm *.pyc */*.pyc 4 | rm *.sqlite */*.sqlite 5 | 6 | zip -r geodata.zip geodata 7 | zip -r gmane.zip gmane 8 | zip -r pagerank.zip pagerank 9 | 10 | -------------------------------------------------------------------------------- /code/copytildone.py: -------------------------------------------------------------------------------- 1 | while True: 2 | line = raw_input('> ') 3 | if line[0] == '#' : 4 | continue 5 | if line == 'done': 6 | break 7 | print line 8 | 9 | print 'Done!' 10 | 11 | -------------------------------------------------------------------------------- /code/count1.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter the file name: ') 2 | try: 3 | fhand = open(fname) 4 | except: 5 | print 'File cannot be opened:', fname 6 | exit() 7 | 8 | counts = dict() 9 | for line in fhand: 10 | words = line.split() 11 | for word in words: 12 | if word not in counts: 13 | counts[word] = 1 14 | else: 15 | counts[word] += 1 16 | 17 | print counts 18 | -------------------------------------------------------------------------------- /code/count2.py: -------------------------------------------------------------------------------- 1 | import string 2 | 3 | fname = raw_input('Enter the file name: ') 4 | try: 5 | fhand = open(fname) 6 | except: 7 | print 'File cannot be opened:', fname 8 | exit() 9 | 10 | counts = dict() 11 | for line in fhand: 12 | line = line.translate(None, string.punctuation) 13 | line = line.lower() 14 | words = line.split() 15 | for word in words: 16 | if word not in counts: 17 | counts[word] = 1 18 | else: 19 | counts[word] += 1 20 | 21 | print counts 22 | -------------------------------------------------------------------------------- /code/count3.py: -------------------------------------------------------------------------------- 1 | import string 2 | fhand = open('romeo-full.txt') 3 | counts = dict() 4 | for line in fhand: 5 | line = line.translate(None, string.punctuation) 6 | line = line.lower() 7 | words = line.split() 8 | for word in words: 9 | if word not in counts: 10 | counts[word] = 1 11 | else: 12 | counts[word] += 1 13 | 14 | # Sort the dictionary by value 15 | lst = list() 16 | for key, val in counts.items(): 17 | lst.append( (val, key) ) 18 | 19 | lst.sort(reverse=True) 20 | 21 | for key, val in lst[:10] : 22 | print key, val 23 | -------------------------------------------------------------------------------- /code/curl1.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | img = urllib.urlopen('http://www.py4inf.com/cover.jpg').read() 4 | fhand = open('cover.jpg', 'w') 5 | fhand.write(img) 6 | fhand.close() 7 | -------------------------------------------------------------------------------- /code/curl2.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | img = urllib.urlopen('http://www.py4inf.com/cover.jpg') 4 | fhand = open('cover.jpg', 'w') 5 | size = 0 6 | while True: 7 | info = img.read(100000) 8 | if len(info) < 1 : break 9 | size = size + len(info) 10 | fhand.write(info) 11 | 12 | print size,'characters copied.' 13 | fhand.close() 14 | -------------------------------------------------------------------------------- /code/curl3.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | 4 | print 'Please enter a URL like http://www.py4inf.com/cover.jpg' 5 | urlstr = raw_input().strip() 6 | img = urllib.urlopen(urlstr) 7 | 8 | # Get the last "word" 9 | words = urlstr.split('/') 10 | fname = words[-1] 11 | 12 | # Don't overwrite the file 13 | if os.path.exists(fname) : 14 | if raw_input('Replace '+fname+' (Y/n)?') != 'Y' : 15 | print 'Data not copied' 16 | exit() 17 | print 'Replacing',fname 18 | 19 | fhand = open(fname, 'w') 20 | size = 0 21 | while True: 22 | info = img.read(100000) 23 | if len(info) < 1 : break 24 | size = size + len(info) 25 | fhand.write(info) 26 | 27 | print size,'characters copied to',fname 28 | fhand.close() 29 | -------------------------------------------------------------------------------- /code/db1.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('music.sqlite') 4 | cur = conn.cursor() 5 | 6 | cur.execute('DROP TABLE IF EXISTS Tracks ') 7 | cur.execute('CREATE TABLE Tracks (title TEXT, plays INTEGER)') 8 | 9 | conn.close() 10 | 11 | -------------------------------------------------------------------------------- /code/db2.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('music.sqlite') 4 | cur = conn.cursor() 5 | 6 | cur.execute('INSERT INTO Tracks (title, plays) VALUES ( ?, ? )', 7 | ( 'Thunderstruck', 20 ) ) 8 | cur.execute('INSERT INTO Tracks (title, plays) VALUES ( ?, ? )', 9 | ( 'My Way', 15 ) ) 10 | conn.commit() 11 | 12 | print 'Tracks:' 13 | cur.execute('SELECT title, plays FROM Tracks') 14 | for row in cur : 15 | print row 16 | 17 | cur.execute('DELETE FROM Tracks WHERE plays < 100') 18 | 19 | cur.close() 20 | 21 | -------------------------------------------------------------------------------- /code/egg.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter the file name: ') 2 | if fname == 'na na boo boo' : 3 | print 'NA NA BOO BOO TO YOU - You have been punkd!' 4 | exit() 5 | 6 | try: 7 | fhand = open(fname) 8 | except: 9 | print 'File cannot be opened:', fname 10 | exit() 11 | count = 0 12 | for line in fhand: 13 | if line.startswith('Subject:') : 14 | count = count + 1 15 | print 'There were', count, 'subject lines in', fname 16 | -------------------------------------------------------------------------------- /code/fahren.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter Fahrenheit Temperature:') 2 | fahr = float(inp) 3 | cel = (fahr - 32.0) * 5.0 / 9.0 4 | print cel 5 | -------------------------------------------------------------------------------- /code/geodata/README.txt: -------------------------------------------------------------------------------- 1 | Using the Google Geocoding API with a Database and 2 | Visualizing data on Google Map 3 | 4 | In this project, we are using the Google geocoding API 5 | to clean up some user-entered geographic locations of 6 | university names and then placing the data on a Google 7 | Map. 8 | 9 | You should install the SQLite browser to view and modify 10 | the databases from: 11 | 12 | http://sqlitebrowser.org/ 13 | 14 | The first problem to solve is that the Google geocoding 15 | API is rate limited to 2500 requests per day. So if you have 16 | a lot of data you might need to stop and restart the lookup 17 | process several times. So we break the problem into two 18 | phases. 19 | 20 | In the first phase we take our input data in the file 21 | (where.data) and read it one line at a time, and retreive the 22 | geocoded response and store it in a database (geodata.sqlite). 23 | Before we use the geocoding API, we simply check to see if 24 | we already have the data for that particular line of input. 25 | 26 | You can re-start the process at any time by removing the file 27 | geodata.sqlite 28 | 29 | Run the geoload.py program. This program will read the input 30 | lines in where.data and for each line check to see if it is already 31 | in the database and if we don't have the data for the location, 32 | call the geocoding API to retrieve the data and stre it in 33 | the database. 34 | 35 | Here is a sample run after there is already some data in the 36 | database: 37 | 38 | Mac: python geoload.py 39 | Win: geoload.py 40 | 41 | Found in database Northeastern University 42 | 43 | Found in database University of Hong Kong, Illinois Institute of Technology, Bradley University 44 | 45 | Found in database Technion 46 | 47 | Found in database Viswakarma Institute, Pune, India 48 | 49 | Found in database UMD 50 | 51 | Found in database Tufts University 52 | 53 | Resolving Monash University 54 | Retrieving http://maps.googleapis.com/maps/api/geocode/json?sensor=false&address=Monash+University 55 | Retrieved 2063 characters { "results" : [ 56 | {u'status': u'OK', u'results': ... } 57 | 58 | Resolving Kokshetau Institute of Economics and Management 59 | Retrieving http://maps.googleapis.com/maps/api/geocode/json?sensor=false&address=Kokshetau+Institute+of+Economics+and+Management 60 | Retrieved 1749 characters { "results" : [ 61 | {u'status': u'OK', u'results': ... } 62 | 63 | The first five locations are already in the database and so they 64 | are skipped. The program scans to the point where it finds un-retrieved 65 | locations and starts retrieving them. 66 | 67 | The geoload.py can be stopped at any time, and there is a counter 68 | that you can use to limit the number of calls to the geocoding 69 | API for each run. 70 | 71 | Once you have some data loaded into geodata.sqlite, you can 72 | visualize the data using the (geodump.py) program. This 73 | program reads the database and writes tile file (where.js) 74 | with the location, latitude, and longitude in the form of 75 | executable JavaScript code. 76 | 77 | A run of the geodump.py program is as follows: 78 | 79 | Mac: python geodump.py 80 | Win: geodump.py 81 | 82 | Northeastern University, 360 Huntington Avenue, Boston, MA 02115, USA 42.3396998 -71.08975 83 | Bradley University, 1501 West Bradley Avenue, Peoria, IL 61625, USA 40.6963857 -89.6160811 84 | ... 85 | Technion, Viazman 87, Kesalsaba, 32000, Israel 32.7775 35.0216667 86 | Monash University Clayton Campus, Wellington Road, Clayton VIC 3800, Australia -37.9152113 145.134682 87 | Kokshetau, Kazakhstan 53.2833333 69.3833333 88 | ... 89 | 12 records written to where.js 90 | Open where.html to view the data in a browser 91 | 92 | The file (where.html) consists of HTML and JavaScript to visualize 93 | a Google Map. It reads the most recent data in where.js to get 94 | the data to be visualized. Here is the format of the where.js file: 95 | 96 | myData = [ 97 | [42.3396998,-71.08975, 'Northeastern University, 360 Huntington Avenue, Boston, MA 02115, USA'], 98 | [40.6963857,-89.6160811, 'Bradley University, 1501 West Bradley Avenue, Peoria, IL 61625, USA'], 99 | [32.7775,35.0216667, 'Technion, Viazman 87, Kesalsaba, 32000, Israel'], 100 | ... 101 | ]; 102 | 103 | This is a JavaScript list of lists. The syntax for JavaScript 104 | list constants is very similar to Python so the syntax should 105 | be familiar to you. 106 | 107 | Simply open where.html in a browser to see the locations. You 108 | can hover over each map pin to find the location that the 109 | gecoding API returned for the user-entered input. If you 110 | cannot see any data when you open the where.html file, you might 111 | want to check the JavaScript or developer console for your browser. 112 | 113 | -------------------------------------------------------------------------------- /code/geodata/geodump.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import json 3 | import codecs 4 | 5 | conn = sqlite3.connect('geodata.sqlite') 6 | cur = conn.cursor() 7 | 8 | cur.execute('SELECT * FROM Locations') 9 | fhand = codecs.open('where.js','w', "utf-8") 10 | fhand.write("myData = [\n") 11 | count = 0 12 | for row in cur : 13 | data = str(row[1]) 14 | try: js = json.loads(str(data)) 15 | except: continue 16 | 17 | if not('status' in js and js['status'] == 'OK') : continue 18 | 19 | lat = js["results"][0]["geometry"]["location"]["lat"] 20 | lng = js["results"][0]["geometry"]["location"]["lng"] 21 | if lat == 0 or lng == 0 : continue 22 | where = js['results'][0]['formatted_address'] 23 | where = where.replace("'","") 24 | try : 25 | print where, lat, lng 26 | 27 | count = count + 1 28 | if count > 1 : fhand.write(",\n") 29 | output = "["+str(lat)+","+str(lng)+", '"+where+"']" 30 | fhand.write(output) 31 | except: 32 | continue 33 | 34 | fhand.write("\n];\n") 35 | cur.close() 36 | fhand.close() 37 | print count, "records written to where.js" 38 | print "Open where.html to view the data in a browser" 39 | 40 | -------------------------------------------------------------------------------- /code/geodata/geoload.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import sqlite3 3 | import json 4 | import time 5 | import ssl 6 | 7 | serviceurl = "http://maps.googleapis.com/maps/api/geocode/json?" 8 | 9 | # Deal with SSL certificate anomalies 10 | scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 11 | 12 | conn = sqlite3.connect('geodata.sqlite') 13 | cur = conn.cursor() 14 | 15 | cur.execute(''' 16 | CREATE TABLE IF NOT EXISTS Locations (address TEXT, geodata TEXT)''') 17 | 18 | fh = open("where.data") 19 | count = 0 20 | for line in fh: 21 | if count > 200 : break 22 | address = line.strip() 23 | print '' 24 | cur.execute("SELECT geodata FROM Locations WHERE address= ?", (buffer(address), )) 25 | 26 | try: 27 | data = cur.fetchone()[0] 28 | print "Found in database ",address 29 | continue 30 | except: 31 | pass 32 | 33 | print 'Resolving', address 34 | url = serviceurl + urllib.urlencode({"sensor":"false", "address": address}) 35 | print 'Retrieving', url 36 | uh = urllib.urlopen(url, context=scontext) 37 | data = uh.read() 38 | print 'Retrieved',len(data),'characters',data[:20].replace('\n',' ') 39 | count = count + 1 40 | try: 41 | js = json.loads(str(data)) 42 | # print js # We print in case unicode causes an error 43 | except: 44 | continue 45 | 46 | if 'status' not in js or (js['status'] != 'OK' and js['status'] != 'ZERO_RESULTS') : 47 | print '==== Failure To Retrieve ====' 48 | print data 49 | break 50 | 51 | cur.execute('''INSERT INTO Locations (address, geodata) 52 | VALUES ( ?, ? )''', ( buffer(address),buffer(data) ) ) 53 | conn.commit() 54 | time.sleep(1) 55 | 56 | print "Run geodump.py to read the data from the database so you can vizualize it on a map." 57 | -------------------------------------------------------------------------------- /code/geodata/where.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | A Map of Information 6 | 7 | 8 | 9 | 10 | 39 | 40 | 41 |
42 |

About this Map

43 |

44 | This is a cool map from 45 | www.pythonlearn.com. 46 |

47 | 48 | 49 | -------------------------------------------------------------------------------- /code/geojson.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import json 3 | 4 | serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?' 5 | 6 | while True: 7 | address = raw_input('Enter location: ') 8 | if len(address) < 1 : break 9 | 10 | url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address}) 11 | print 'Retrieving', url 12 | uh = urllib.urlopen(url) 13 | data = uh.read() 14 | print 'Retrieved',len(data),'characters' 15 | 16 | try: js = json.loads(str(data)) 17 | except: js = None 18 | if 'status' not in js or js['status'] != 'OK': 19 | print '==== Failure To Retrieve ====' 20 | print data 21 | continue 22 | 23 | print json.dumps(js, indent=4) 24 | 25 | lat = js["results"][0]["geometry"]["location"]["lat"] 26 | lng = js["results"][0]["geometry"]["location"]["lng"] 27 | print 'lat',lat,'lng',lng 28 | location = js['results'][0]['formatted_address'] 29 | print location 30 | 31 | -------------------------------------------------------------------------------- /code/geoxml.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import xml.etree.ElementTree as ET 3 | 4 | serviceurl = 'http://maps.googleapis.com/maps/api/geocode/xml?' 5 | 6 | while True: 7 | address = raw_input('Enter location: ') 8 | if len(address) < 1 : break 9 | 10 | url = serviceurl + urllib.urlencode({'sensor':'false', 'address': address}) 11 | print 'Retrieving', url 12 | uh = urllib.urlopen(url) 13 | data = uh.read() 14 | print 'Retrieved',len(data),'characters' 15 | print data 16 | tree = ET.fromstring(data) 17 | 18 | 19 | results = tree.findall('result') 20 | lat = results[0].find('geometry').find('location').find('lat').text 21 | lng = results[0].find('geometry').find('location').find('lng').text 22 | location = results[0].find('formatted_address').text 23 | 24 | print 'lat',lat,'lng',lng 25 | print location 26 | -------------------------------------------------------------------------------- /code/gmane/gbasic.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | import urllib 4 | import zlib 5 | 6 | howmany = int(raw_input("How many to dump? ")) 7 | 8 | conn = sqlite3.connect('index.sqlite') 9 | conn.text_factory = str 10 | cur = conn.cursor() 11 | 12 | cur.execute('SELECT id, sender FROM Senders') 13 | senders = dict() 14 | for message_row in cur : 15 | senders[message_row[0]] = message_row[1] 16 | 17 | cur.execute('SELECT id, subject FROM Subjects') 18 | subjects = dict() 19 | for message_row in cur : 20 | subjects[message_row[0]] = message_row[1] 21 | 22 | # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages') 23 | cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages') 24 | messages = dict() 25 | for message_row in cur : 26 | messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4]) 27 | 28 | print "Loaded messages=",len(messages),"subjects=",len(subjects),"senders=",len(senders) 29 | 30 | sendcounts = dict() 31 | sendorgs = dict() 32 | for (message_id, message) in messages.items(): 33 | sender = message[1] 34 | sendcounts[sender] = sendcounts.get(sender,0) + 1 35 | pieces = senders[sender].split("@") 36 | if len(pieces) != 2 : continue 37 | dns = pieces[1] 38 | sendorgs[dns] = sendorgs.get(dns,0) + 1 39 | 40 | print '' 41 | print 'Top',howmany,'Email list participants' 42 | 43 | x = sorted(sendcounts, key=sendcounts.get, reverse=True) 44 | for k in x[:howmany]: 45 | print senders[k], sendcounts[k] 46 | if sendcounts[k] < 10 : break 47 | 48 | print '' 49 | print 'Top',howmany,'Email list organizations' 50 | 51 | x = sorted(sendorgs, key=sendorgs.get, reverse=True) 52 | for k in x[:howmany]: 53 | print k, sendorgs[k] 54 | if sendorgs[k] < 10 : break 55 | 56 | -------------------------------------------------------------------------------- /code/gmane/gline.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 19 | 20 | 21 |
22 | 23 | 24 | -------------------------------------------------------------------------------- /code/gmane/gline.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | import urllib 4 | import zlib 5 | 6 | conn = sqlite3.connect('index.sqlite') 7 | conn.text_factory = str 8 | cur = conn.cursor() 9 | 10 | cur.execute('SELECT id, sender FROM Senders') 11 | senders = dict() 12 | for message_row in cur : 13 | senders[message_row[0]] = message_row[1] 14 | 15 | cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages') 16 | messages = dict() 17 | for message_row in cur : 18 | messages[message_row[0]] = (message_row[1],message_row[2],message_row[3],message_row[4]) 19 | 20 | print "Loaded messages=",len(messages),"senders=",len(senders) 21 | 22 | sendorgs = dict() 23 | for (message_id, message) in messages.items(): 24 | sender = message[1] 25 | pieces = senders[sender].split("@") 26 | if len(pieces) != 2 : continue 27 | dns = pieces[1] 28 | sendorgs[dns] = sendorgs.get(dns,0) + 1 29 | 30 | # pick the top schools 31 | orgs = sorted(sendorgs, key=sendorgs.get, reverse=True) 32 | orgs = orgs[:10] 33 | print "Top 10 Oranizations" 34 | print orgs 35 | 36 | counts = dict() 37 | months = list() 38 | # cur.execute('SELECT id, guid,sender_id,subject_id,sent_at FROM Messages') 39 | for (message_id, message) in messages.items(): 40 | sender = message[1] 41 | pieces = senders[sender].split("@") 42 | if len(pieces) != 2 : continue 43 | dns = pieces[1] 44 | if dns not in orgs : continue 45 | month = message[3][:7] 46 | if month not in months : months.append(month) 47 | key = (month, dns) 48 | counts[key] = counts.get(key,0) + 1 49 | 50 | months.sort() 51 | # print counts 52 | # print months 53 | 54 | fhand = open('gline.js','w') 55 | fhand.write("gline = [ ['Year'") 56 | for org in orgs: 57 | fhand.write(",'"+org+"'") 58 | fhand.write("]") 59 | 60 | for month in months: 61 | fhand.write(",\n['"+month+"'") 62 | for org in orgs: 63 | key = (month, org) 64 | val = counts.get(key,0) 65 | fhand.write(","+str(val)) 66 | fhand.write("]"); 67 | 68 | fhand.write("\n];\n") 69 | 70 | print "Output written to gline.js" 71 | -------------------------------------------------------------------------------- /code/gmane/gmane.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | import ssl 4 | import urllib 5 | from urlparse import urljoin 6 | from urlparse import urlparse 7 | import re 8 | from datetime import datetime, timedelta 9 | # Not all systems have this 10 | try: 11 | import dateutil.parser as parser 12 | except: 13 | pass 14 | 15 | def parsemaildate(md) : 16 | # See if we have dateutil 17 | try: 18 | pdate = parser.parse(tdate) 19 | test_at = pdate.isoformat() 20 | return test_at 21 | except: 22 | pass 23 | 24 | # Non-dateutil version - we try our best 25 | 26 | pieces = md.split() 27 | notz = " ".join(pieces[:4]).strip() 28 | 29 | # Try a bunch of format variations - strptime() is *lame* 30 | dnotz = None 31 | for form in [ '%d %b %Y %H:%M:%S', '%d %b %Y %H:%M:%S', 32 | '%d %b %Y %H:%M', '%d %b %Y %H:%M', '%d %b %y %H:%M:%S', 33 | '%d %b %y %H:%M:%S', '%d %b %y %H:%M', '%d %b %y %H:%M' ] : 34 | try: 35 | dnotz = datetime.strptime(notz, form) 36 | break 37 | except: 38 | continue 39 | 40 | if dnotz is None : 41 | # print 'Bad Date:',md 42 | return None 43 | 44 | iso = dnotz.isoformat() 45 | 46 | tz = "+0000" 47 | try: 48 | tz = pieces[4] 49 | ival = int(tz) # Only want numeric timezone values 50 | if tz == '-0000' : tz = '+0000' 51 | tzh = tz[:3] 52 | tzm = tz[3:] 53 | tz = tzh+":"+tzm 54 | except: 55 | pass 56 | 57 | return iso+tz 58 | 59 | # Deal with SSL certificate anomalies 60 | scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 61 | 62 | conn = sqlite3.connect('content.sqlite') 63 | cur = conn.cursor() 64 | conn.text_factory = str 65 | 66 | baseurl = "http://gmane.dr-chuck.net/gmane.comp.cms.sakai.devel/" 67 | 68 | cur.execute('''CREATE TABLE IF NOT EXISTS Messages 69 | (id INTEGER UNIQUE, email TEXT, sent_at TEXT, 70 | subject TEXT, headers TEXT, body TEXT)''') 71 | 72 | # This will be manually filled in 73 | cur.execute('''CREATE TABLE IF NOT EXISTS Mapping 74 | (old TEXT, new TEXT)''') 75 | 76 | # This will be manually filled in 77 | cur.execute('''CREATE TABLE IF NOT EXISTS DNSMapping 78 | (old TEXT, new TEXT)''') 79 | 80 | start = 0 81 | many = 0 82 | while True: 83 | if ( many < 1 ) : 84 | sval = raw_input('How many messages:') 85 | if ( len(sval) < 1 ) : break 86 | many = int(sval) 87 | 88 | start = start + 1 89 | cur.execute('SELECT id FROM Messages WHERE id=?', (start,) ) 90 | try: 91 | row = cur.fetchone() 92 | if row is not None : continue 93 | except: 94 | row = None 95 | 96 | many = many - 1 97 | url = baseurl + str(start) + '/' + str(start + 1) 98 | 99 | try: 100 | document = urllib.urlopen(url, context=scontext) 101 | text = document.read() 102 | if document.getcode() != 200 : 103 | print "Error code=",document.getcode(), url 104 | break 105 | except KeyboardInterrupt: 106 | print '' 107 | print 'Program interrupted by user...' 108 | break 109 | except: 110 | print "Unable to retrieve or parse page",url 111 | break 112 | 113 | print url,len(text) 114 | 115 | if not text.startswith("From "): 116 | print text 117 | print "End of mail stream reached..." 118 | quit () 119 | 120 | pos = text.find("\n\n") 121 | if pos > 0 : 122 | hdr = text[:pos] 123 | body = text[pos+2:] 124 | else: 125 | print text 126 | print "Could not find break between headers and body" 127 | break 128 | 129 | email = None 130 | x = re.findall('\nFrom: .* <(\S+@\S+)>\n', hdr) 131 | if len(x) == 1 : 132 | email = x[0]; 133 | email = email.strip().lower() 134 | email = email.replace("<","") 135 | else: 136 | x = re.findall('\nFrom: (\S+@\S+)\n', hdr) 137 | if len(x) == 1 : 138 | email = x[0]; 139 | email = email.strip().lower() 140 | email = email.replace("<","") 141 | 142 | date = None 143 | y = re.findall('\Date: .*, (.*)\n', hdr) 144 | if len(y) == 1 : 145 | tdate = y[0] 146 | tdate = tdate[:26] 147 | try: 148 | sent_at = parsemaildate(tdate) 149 | except: 150 | print text 151 | print "Parse fail",tdate 152 | break 153 | 154 | subject = None 155 | z = re.findall('\Subject: (.*)\n', hdr) 156 | if len(z) == 1 : subject = z[0].strip().lower(); 157 | 158 | print " ",email,sent_at,subject 159 | cur.execute('''INSERT OR IGNORE INTO Messages (id, email, sent_at, subject, headers, body) 160 | VALUES ( ?, ?, ?, ?, ?, ? )''', ( start, email, sent_at, subject, hdr, body)) 161 | conn.commit() 162 | time.sleep(1) 163 | 164 | cur.close() 165 | 166 | -------------------------------------------------------------------------------- /code/gmane/gword.htm: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 37 | -------------------------------------------------------------------------------- /code/gmane/gword.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import time 3 | import urllib 4 | import zlib 5 | import string 6 | 7 | conn = sqlite3.connect('index.sqlite') 8 | conn.text_factory = str 9 | cur = conn.cursor() 10 | 11 | cur.execute('SELECT id, subject FROM Subjects') 12 | subjects = dict() 13 | for message_row in cur : 14 | subjects[message_row[0]] = message_row[1] 15 | 16 | # cur.execute('SELECT id, guid,sender_id,subject_id,headers,body FROM Messages') 17 | cur.execute('SELECT subject_id FROM Messages') 18 | counts = dict() 19 | for message_row in cur : 20 | text = subjects[message_row[0]] 21 | text = text.translate(None, string.punctuation) 22 | text = text.translate(None, '1234567890') 23 | text = text.strip() 24 | text = text.lower() 25 | words = text.split() 26 | for word in words: 27 | if len(word) < 4 : continue 28 | counts[word] = counts.get(word,0) + 1 29 | 30 | x = sorted(counts, key=counts.get, reverse=True) 31 | highest = None 32 | lowest = None 33 | for k in x[:100]: 34 | if highest is None or highest < counts[k] : 35 | highest = counts[k] 36 | if lowest is None or lowest > counts[k] : 37 | lowest = counts[k] 38 | print 'Range of counts:',highest,lowest 39 | 40 | # Spread the font sizes across 20-100 based on the count 41 | bigsize = 80 42 | smallsize = 20 43 | 44 | fhand = open('gword.js','w') 45 | fhand.write("gword = [") 46 | first = True 47 | for k in x[:100]: 48 | if not first : fhand.write( ",\n") 49 | first = False 50 | size = counts[k] 51 | size = (size - lowest) / float(highest - lowest) 52 | size = int((size * bigsize) + smallsize) 53 | fhand.write("{text: '"+k+"', size: "+str(size)+"}") 54 | fhand.write( "\n];\n") 55 | 56 | print "Output written to gword.js" 57 | -------------------------------------------------------------------------------- /code/grade.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter score: ') 2 | try: 3 | score = float(inp) 4 | except: 5 | score = -1 6 | 7 | if score > 1.0 or score < 0.0: 8 | print 'Bad score' 9 | elif score > 0.9: 10 | print 'A' 11 | elif score > 0.8: 12 | print 'B' 13 | elif score > 0.7: 14 | print 'C' 15 | elif score > 0.6: 16 | print 'D' 17 | else: 18 | print 'F' 19 | -------------------------------------------------------------------------------- /code/graphics/histogram.py: -------------------------------------------------------------------------------- 1 | import string 2 | from graphics import * 3 | 4 | fname = raw_input("Enter file name:") 5 | if len(fname) == 0 : 6 | print "Assuming mbox-short.txt" 7 | fname = "mbox-short.txt" 8 | infile = open(fname, "r") 9 | 10 | # Set up a 24 element list of zeros 11 | totals = [0] * 24; 12 | print totals; 13 | 14 | # Accumulate the times 15 | for line in infile: 16 | if line[0:5] == "From " : 17 | words = line.split() 18 | time = words[5] 19 | print "Time", time 20 | 21 | # Split time 22 | tsplit = time.split(':') 23 | try : 24 | hour = int(tsplit[0]) 25 | print "Hour", hour 26 | except: 27 | print "Hour not found" 28 | continue 29 | 30 | totals[hour] = totals[hour] + 1 31 | print totals 32 | 33 | bmax = max(totals) 34 | print "Maximum value", bmax 35 | 36 | ymax = ( int(bmax / 10) + 1 ) * 10 37 | 38 | print "Y-Axis Maximum", ymax 39 | 40 | win = GraphWin("Distribution of Commits "+fname, 600,400) 41 | win.setCoords(0,0,1,1) 42 | 43 | # Draw the X-Axis 44 | xaxis = Line(Point(0.1,0.1),Point(0.9,0.1)) 45 | xaxis.draw(win) 46 | 47 | # Label the X-Axis - we have 24 hours (0-23) 48 | # so we need to know each slot's width 49 | width = 0.8 * (1.0 / 24.0) 50 | for i in range(24): 51 | center = (i * width) + (width / 2.0) + 0.1; 52 | txt = Text(Point(center, 0.066), str(i)) 53 | txt.draw(win) 54 | 55 | txt = Text(Point(0.5,0.033),"Hour of the Day"); 56 | txt.draw(win) 57 | 58 | # Draw the Y-Axis 59 | yaxis = Line(Point(0.1,0.1),Point(0.1,0.9)) 60 | yaxis.draw(win) 61 | 62 | # Label the Y-Axis 63 | # we will have 10 labels up to ymax 64 | unit = ymax / 10.0; 65 | for i in range(10) : 66 | center = 0.1 + (i + 1) * 0.08; 67 | value = int( (i + 1) * unit ) ; 68 | txt = Text(Point(0.066,center), str(value)) 69 | txt.draw(win) 70 | 71 | 72 | # Draw the bars 73 | for i in range(24): 74 | if totals[i] == 0: 75 | continue 76 | left = i * width + 0.1; 77 | right = i * width + width + 0.1; 78 | height = (float(totals[i]) / ymax) * 0.8; 79 | rec = Rectangle(Point(left,0.1), Point(right,0.1+height)) 80 | rec.setFill('blue') 81 | rec.draw(win) 82 | 83 | win.getMouse() 84 | -------------------------------------------------------------------------------- /code/greet.py: -------------------------------------------------------------------------------- 1 | name = raw_input('Enter your name:') 2 | print 'Hello', name 3 | -------------------------------------------------------------------------------- /code/grep.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox.txt') 4 | search = raw_input('Enter a regular expression: ') 5 | count = 0 6 | for line in hand: 7 | line = line.rstrip() 8 | if re.search(search,line) : count = count + 1 9 | 10 | print 'mbox.txt had',count,'lines that matched',search 11 | -------------------------------------------------------------------------------- /code/hidden.py: -------------------------------------------------------------------------------- 1 | # Keep this file separate 2 | 3 | def oauth() : 4 | return { "consumer_key" : "h7Lu...Ng", 5 | "consumer_secret" : "dNKenAC3New...mmn7Q", 6 | "token_key" : "10185562-eibxCp9n2...P4GEQQOSGI", 7 | "token_secret" : "H0ycCFemmC4wyf1...qoIpBo" } 8 | -------------------------------------------------------------------------------- /code/json1.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | data = ''' 4 | { 5 | "name" : "Chuck", 6 | "phone" : { 7 | "type" : "intl", 8 | "number" : "+1 734 303 4456" 9 | }, 10 | "email" : { 11 | "hide" : "yes" 12 | } 13 | }''' 14 | 15 | info = json.loads(data) 16 | print 'Name:',info["name"] 17 | print 'Hide:',info["email"]["hide"] 18 | -------------------------------------------------------------------------------- /code/json2.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | input = ''' 4 | [ 5 | { "id" : "001", 6 | "x" : "2", 7 | "name" : "Chuck" 8 | } , 9 | { "id" : "009", 10 | "x" : "7", 11 | "name" : "Chuck" 12 | } 13 | ]''' 14 | 15 | info = json.loads(input) 16 | print 'User count:', len(info) 17 | 18 | for item in info: 19 | print 'Name', item['name'] 20 | print 'Id', item['id'] 21 | print 'Attribute', item['x'] 22 | 23 | -------------------------------------------------------------------------------- /code/largest.py: -------------------------------------------------------------------------------- 1 | largest = None 2 | print 'Before:', largest 3 | for iterval in [3, 41, 12, 9, 74, 15]: 4 | if largest == None or largest < iterval: 5 | largest = iterval 6 | print 'Loop:', iterval, largest 7 | print 'Largest:', largest 8 | 9 | -------------------------------------------------------------------------------- /code/mailcount.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter file name: ') 2 | fhand = open(fname) 3 | c = dict() 4 | for line in fhand: 5 | if not line.startswith('From ') : continue 6 | pieces = line.split() 7 | email = pieces[1] 8 | c[email] = c.get(email,0) + 1 9 | 10 | print c 11 | -------------------------------------------------------------------------------- /code/mailtop.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter file name: ') 2 | fhand = open(fname) 3 | c = dict() 4 | for line in fhand: 5 | if not line.startswith('From ') : continue 6 | pieces = line.split() 7 | email = pieces[1] 8 | c[email] = c.get(email,0) + 1 9 | 10 | bigc = None 11 | bige = None 12 | for word in c: 13 | value = c[word] 14 | if bigc == None or value > bigc: 15 | bigw = word 16 | bigc = value 17 | 18 | print bigw, bigc 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /code/old-twitter/twdump.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('twdata.db') 4 | cur = conn.cursor() 5 | cur.execute('SELECT * FROM Twitter') 6 | count = 0 7 | for row in cur : 8 | print row 9 | count = count + 1 10 | print count, 'rows.' 11 | cur.close() 12 | -------------------------------------------------------------------------------- /code/old-twitter/twfriends.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import urllib 3 | import xml.etree.ElementTree as ET 4 | 5 | TWITTER_URL = 'http://api.twitter.com/l/statuses/friends/ACCT.xml' 6 | 7 | conn = sqlite3.connect('twdata.db') 8 | cur = conn.cursor() 9 | 10 | cur.execute('''CREATE TABLE IF NOT EXISTS People 11 | (id INTEGER PRIMARY KEY, name TEXT UNIQUE, retrieved INTEGER)''') 12 | cur.execute('''CREATE TABLE IF NOT EXISTS Follows 13 | (from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''') 14 | 15 | while True: 16 | acct = raw_input('Enter a Twitter account, or quit: ') 17 | if ( acct == 'quit' ) : break 18 | if ( len(acct) < 1 ) : 19 | cur.execute('SELECT id, name FROM People WHERE retrieved = 0 LIMIT 1') 20 | try: 21 | (id, acct) = cur.fetchone() 22 | except: 23 | print 'No unretrieved Twitter accounts found' 24 | continue 25 | else: 26 | cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1', 27 | (acct, ) ) 28 | try: 29 | id = cur.fetchone()[0] 30 | except: 31 | cur.execute('INSERT OR IGNORE INTO People (name, retrieved) VALUES ( ?, 0)', 32 | ( acct, ) ) 33 | conn.commit() 34 | if cur.rowcount != 1 : 35 | print 'Error inserting account:',acct 36 | continue 37 | id = cur.lastrowid 38 | 39 | url = TWITTER_URL.replace('ACCT', acct) 40 | print 'Retrieving', url 41 | document = urllib.urlopen (url).read() 42 | tree = ET.fromstring(document) 43 | 44 | cur.execute('UPDATE People SET retrieved=1 WHERE name = ?', (acct, ) ) 45 | 46 | countnew = 0 47 | countold = 0 48 | for user in tree.findall('user'): 49 | friend = user.find('screen_name').text 50 | cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1', 51 | (friend, ) ) 52 | try: 53 | friend_id = cur.fetchone()[0] 54 | countold = countold + 1 55 | except: 56 | cur.execute('''INSERT OR IGNORE INTO People (name, retrieved) 57 | VALUES ( ?, 0)''', ( friend, ) ) 58 | conn.commit() 59 | if cur.rowcount != 1 : 60 | print 'Error inserting account:',friend 61 | continue 62 | friend_id = cur.lastrowid 63 | countnew = countnew + 1 64 | cur.execute('INSERT OR IGNORE INTO Follows (from_id, to_id) VALUES (?, ?)', 65 | (id, friend_id) ) 66 | print 'New accounts=',countnew,' revisited=',countold 67 | conn.commit() 68 | 69 | cur.close() 70 | 71 | -------------------------------------------------------------------------------- /code/old-twitter/twitter1.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | TWITTER_URL = 'http://api.twitter.com/1/statuses/friends/ACCT.xml' 4 | 5 | while True: 6 | print '' 7 | acct = raw_input('Enter Twitter Account:') 8 | if ( len(acct) < 1 ) : break 9 | url = TWITTER_URL.replace('ACCT', acct) 10 | print 'Retrieving', url 11 | document = urllib.urlopen (url).read() 12 | print document[:250] 13 | -------------------------------------------------------------------------------- /code/old-twitter/twitter2.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import xml.etree.ElementTree as ET 3 | 4 | TWITTER_URL = 'http://api.twitter.com/1/statuses/friends/ACCT.xml' 5 | 6 | while True: 7 | print '' 8 | acct = raw_input('Enter Twitter Account:') 9 | if ( len(acct) < 1 ) : break 10 | url = TWITTER_URL.replace('ACCT', acct) 11 | print 'Retrieving', url 12 | document = urllib.urlopen (url).read() 13 | print 'Retrieved', len(document), 'characters.' 14 | tree = ET.fromstring(document) 15 | count = 0 16 | for user in tree.findall('user'): 17 | count = count + 1 18 | if count > 4 : break 19 | print user.find('screen_name').text 20 | status = user.find('status') 21 | if status is not None : 22 | txt = status.find('text').text 23 | print ' ',txt[:50] 24 | -------------------------------------------------------------------------------- /code/old-twitter/twitter3.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import xml.etree.ElementTree as ET 3 | 4 | TWITTER_URL = 'http://api.twitter.com/1/statuses/friends/ACCT.xml' 5 | 6 | while True: 7 | print '' 8 | acct = raw_input('Enter Twitter Account:') 9 | if ( len(acct) < 1 ) : break 10 | url = TWITTER_URL.replace('ACCT', acct) 11 | print 'Retrieving', url 12 | document = urllib.urlopen (url).read() 13 | print 'Retrieved', len(document), 'characters.' 14 | tree = ET.fromstring(document) 15 | count = 0 16 | for user in tree.findall('user'): 17 | count = count + 1 18 | if count > 4 : break 19 | print user.find('screen_name').text 20 | status = user.find('status') 21 | if status is not None : 22 | txt = status.find('text').text 23 | print ' ',txt[:50] 24 | -------------------------------------------------------------------------------- /code/old-twitter/twitter4.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import json 3 | 4 | TWITTER_URL = 'http://api.twitter.com/1/statuses/friends/ACCT.json' 5 | 6 | while True: 7 | print '' 8 | acct = raw_input('Enter Twitter Account:') 9 | if ( len(acct) < 1 ) : break 10 | url = TWITTER_URL.replace('ACCT', acct) 11 | print 'Retrieving', url 12 | document = urllib.urlopen (url).read() 13 | print 'Retrieved', len(document), 'characters.' 14 | js = json.loads(document) 15 | count = 0 16 | for user in js: 17 | count = count + 1 18 | if count > 4 : break 19 | print user['screen_name'] 20 | status = user.get('status', None) 21 | if status is not None : 22 | txt = status['text'] 23 | print ' ',txt[:50] 24 | -------------------------------------------------------------------------------- /code/old-twitter/twjoin.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('twdata.db') 4 | cur = conn.cursor() 5 | 6 | cur.execute('SELECT * FROM People') 7 | count = 0 8 | print 'People:' 9 | for row in cur : 10 | if count < 5: print row 11 | count = count + 1 12 | print count, 'rows.' 13 | 14 | cur.execute('SELECT * FROM Follows') 15 | count = 0 16 | print 'Follows:' 17 | for row in cur : 18 | if count < 5: print row 19 | count = count + 1 20 | print count, 'rows.' 21 | 22 | cur.execute('''SELECT * FROM Follows JOIN People 23 | ON Follows.to_id = People.id WHERE Follows.from_id = 2''') 24 | count = 0 25 | print 'Connections for id=2:' 26 | for row in cur : 27 | if count < 5: print row 28 | count = count + 1 29 | print count, 'rows.' 30 | 31 | cur.close() 32 | -------------------------------------------------------------------------------- /code/old-twitter/twspider.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import urllib 3 | import xml.etree.ElementTree as ET 4 | 5 | TWITTER_URL = 'http://api.twitter.com/l/statuses/friends/ACCT.xml' 6 | 7 | conn = sqlite3.connect('twdata.db') 8 | cur = conn.cursor() 9 | 10 | cur.execute(''' 11 | CREATE TABLE IF NOT EXISTS Twitter (name TEXT, retrieved INTEGER, friends INTEGER)''') 12 | 13 | while True: 14 | acct = raw_input('Enter a Twitter account, or quit: ') 15 | if ( acct == 'quit' ) : break 16 | if ( len(acct) < 1 ) : 17 | cur.execute('SELECT name FROM Twitter WHERE retrieved = 0 LIMIT 1') 18 | try: 19 | acct = cur.fetchone()[0] 20 | except: 21 | print 'No unretrieved Twitter accounts found' 22 | continue 23 | 24 | url = TWITTER_URL.replace('ACCT', acct) 25 | print 'Retrieving', url 26 | document = urllib.urlopen (url).read() 27 | tree = ET.fromstring(document) 28 | 29 | cur.execute('UPDATE Twitter SET retrieved=1 WHERE name = ?', (acct, ) ) 30 | 31 | countnew = 0 32 | countold = 0 33 | for user in tree.findall('user'): 34 | friend = user.find('screen_name').text 35 | cur.execute('SELECT friends FROM Twitter WHERE name = ? LIMIT 1', 36 | (friend, ) ) 37 | try: 38 | count = cur.fetchone()[0] 39 | cur.execute('UPDATE Twitter SET friends = ? WHERE name = ?', 40 | (count+1, friend) ) 41 | countold = countold + 1 42 | except: 43 | cur.execute('''INSERT INTO Twitter (name, retrieved, friends) 44 | VALUES ( ?, 0, 1 )''', ( friend, ) ) 45 | countnew = countnew + 1 46 | print 'New accounts=',countnew,' revisited=',countold 47 | conn.commit() 48 | 49 | cur.close() 50 | 51 | -------------------------------------------------------------------------------- /code/open.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox.txt') 2 | count = 0 3 | for line in fhand: 4 | count = count + 1 5 | print 'Line Count:', count 6 | 7 | -------------------------------------------------------------------------------- /code/pagerank/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Michael Bostock 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * The name Michael Bostock may not be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /code/pagerank/README.txt: -------------------------------------------------------------------------------- 1 | Simple Python Search Spider, Page Ranker, and Visualizer 2 | 3 | This is a set of programs that emulate some of the functions of a 4 | search engine. They store their data in a SQLITE3 database named 5 | 'spider.sqlite'. This file can be removed at any time to restart the 6 | process. 7 | 8 | You should install the SQLite browser to view and modify 9 | the databases from: 10 | 11 | http://sqlitebrowser.org/ 12 | 13 | This program crawls a web site and pulls a series of pages into the 14 | database, recording the links between pages. 15 | 16 | Mac: rm spider.sqlite 17 | Mac: python spider.py 18 | 19 | Win: del spider.sqlite 20 | Win: spider.py 21 | 22 | Enter web url or enter: http://www.dr-chuck.com/ 23 | ['http://www.dr-chuck.com'] 24 | How many pages:2 25 | 1 http://www.dr-chuck.com/ 12 26 | 2 http://www.dr-chuck.com/csev-blog/ 57 27 | How many pages: 28 | 29 | In this sample run, we told it to crawl a website and retrieve two 30 | pages. If you restart the program again and tell it to crawl more 31 | pages, it will not re-crawl any pages already in the database. Upon 32 | restart it goes to a random non-crawled page and starts there. So 33 | each successive run of spider.py is additive. 34 | 35 | Mac: python spider.py 36 | Win: spider.py 37 | 38 | Enter web url or enter: http://www.dr-chuck.com/ 39 | ['http://www.dr-chuck.com'] 40 | How many pages:3 41 | 3 http://www.dr-chuck.com/csev-blog 57 42 | 4 http://www.dr-chuck.com/dr-chuck/resume/speaking.htm 1 43 | 5 http://www.dr-chuck.com/dr-chuck/resume/index.htm 13 44 | How many pages: 45 | 46 | You can have multiple starting points in the same database - 47 | within the program these are called "webs". The spider 48 | chooses randomly amongst all non-visited links across all 49 | the webs. 50 | 51 | If you want to dump the contents of the spider.sqlite file, you can 52 | run spdump.py as follows: 53 | 54 | Mac: python spdump.py 55 | Win: spdump.py 56 | 57 | (5, None, 1.0, 3, u'http://www.dr-chuck.com/csev-blog') 58 | (3, None, 1.0, 4, u'http://www.dr-chuck.com/dr-chuck/resume/speaking.htm') 59 | (1, None, 1.0, 2, u'http://www.dr-chuck.com/csev-blog/') 60 | (1, None, 1.0, 5, u'http://www.dr-chuck.com/dr-chuck/resume/index.htm') 61 | 4 rows. 62 | 63 | This shows the number of incoming links, the old page rank, the new page 64 | rank, the id of the page, and the url of the page. The spdump.py program 65 | only shows pages that have at least one incoming link to them. 66 | 67 | Once you have a few pages in the database, you can run Page Rank on the 68 | pages using the sprank.py program. You simply tell it how many Page 69 | Rank iterations to run. 70 | 71 | Mac: python sprank.py 72 | Win: sprank.py 73 | 74 | How many iterations:2 75 | 1 0.546848992536 76 | 2 0.226714939664 77 | [(1, 0.559), (2, 0.659), (3, 0.985), (4, 2.135), (5, 0.659)] 78 | 79 | You can dump the database again to see that page rank has been updated: 80 | 81 | Mac: python spdump.py 82 | Win: spdump.py 83 | 84 | (5, 1.0, 0.985, 3, u'http://www.dr-chuck.com/csev-blog') 85 | (3, 1.0, 2.135, 4, u'http://www.dr-chuck.com/dr-chuck/resume/speaking.htm') 86 | (1, 1.0, 0.659, 2, u'http://www.dr-chuck.com/csev-blog/') 87 | (1, 1.0, 0.659, 5, u'http://www.dr-chuck.com/dr-chuck/resume/index.htm') 88 | 4 rows. 89 | 90 | You can run sprank.py as many times as you like and it will simply refine 91 | the page rank the more times you run it. You can even run sprank.py a few times 92 | and then go spider a few more pages sith spider.py and then run sprank.py 93 | to converge the page ranks. 94 | 95 | If you want to restart the Page Rank calculations without re-spidering the 96 | web pages, you can use spreset.py 97 | 98 | Mac: python spreset.py 99 | Win: spreset.py 100 | 101 | All pages set to a rank of 1.0 102 | 103 | Mac: python sprank.py 104 | Win: sprank.py 105 | 106 | How many iterations:50 107 | 1 0.546848992536 108 | 2 0.226714939664 109 | 3 0.0659516187242 110 | 4 0.0244199333 111 | 5 0.0102096489546 112 | 6 0.00610244329379 113 | ... 114 | 42 0.000109076928206 115 | 43 9.91987599002e-05 116 | 44 9.02151706798e-05 117 | 45 8.20451504471e-05 118 | 46 7.46150183837e-05 119 | 47 6.7857770908e-05 120 | 48 6.17124694224e-05 121 | 49 5.61236959327e-05 122 | 50 5.10410499467e-05 123 | [(512, 0.02963718031139026), (1, 12.790786721866658), (2, 28.939418898678284), (3, 6.808468390725946), (4, 13.469889092397006)] 124 | 125 | For each iteration of the page rank algorithm it prints the average 126 | change per page of the page rank. The network initially is quite 127 | unbalanced and so the individual page ranks are changeing wildly. 128 | But in a few short iterations, the page rank converges. You 129 | should run prank.py long enough that the page ranks converge. 130 | 131 | If you want to visualize the current top pages in terms of page rank, 132 | run spjson.py to write the pages out in JSON format to be viewed in a 133 | web browser. 134 | 135 | Mac: python spjson.py 136 | Win: spjson.py 137 | 138 | Creating JSON output on spider.js... 139 | How many nodes? 30 140 | Open force.html in a browser to view the visualization 141 | 142 | You can view this data by opening the file force.html in your web browser. 143 | This shows an automatic layout of the nodes and links. You can click and 144 | drag any node and you can also double click on a node to find the URL 145 | that is represented by the node. 146 | 147 | This visualization is provided using the force layout from: 148 | 149 | http://mbostock.github.com/d3/ 150 | 151 | If you rerun the other utilities and then re-run spjson.py - you merely 152 | have to press refresh in the browser to get the new data from spider.js. 153 | 154 | -------------------------------------------------------------------------------- /code/pagerank/force.css: -------------------------------------------------------------------------------- 1 | circle.node { 2 | stroke: #fff; 3 | stroke-width: 1.5px; 4 | } 5 | 6 | line.link { 7 | stroke: #999; 8 | stroke-opacity: .6; 9 | } 10 | -------------------------------------------------------------------------------- /code/pagerank/force.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Force-Directed Layout 5 | 6 | 7 | 8 | 9 | 10 | 13 |
14 | 15 |

If you don't see a chart above, check the JavaScript console. You may 16 | need to use a different browser.

17 | 18 | 19 | -------------------------------------------------------------------------------- /code/pagerank/force.js: -------------------------------------------------------------------------------- 1 | var width = 600, 2 | height = 600; 3 | 4 | var color = d3.scale.category20(); 5 | 6 | var dist = (width + height) / 4; 7 | 8 | var force = d3.layout.force() 9 | .charge(-120) 10 | .linkDistance(dist) 11 | .size([width, height]); 12 | 13 | function getrank(rval) { 14 | return (rval/2.0) + 3; 15 | } 16 | 17 | function getcolor(rval) { 18 | return color(rval); 19 | } 20 | 21 | var svg = d3.select("#chart").append("svg") 22 | .attr("width", width) 23 | .attr("height", height); 24 | 25 | function loadData(json) { 26 | force 27 | .nodes(json.nodes) 28 | .links(json.links); 29 | 30 | var k = Math.sqrt(json.nodes.length / (width * height)); 31 | 32 | force 33 | .charge(-10 / k) 34 | .gravity(100 * k) 35 | .start(); 36 | 37 | var link = svg.selectAll("line.link") 38 | .data(json.links) 39 | .enter().append("line") 40 | .attr("class", "link") 41 | .style("stroke-width", function(d) { return Math.sqrt(d.value); }); 42 | 43 | var node = svg.selectAll("circle.node") 44 | .data(json.nodes) 45 | .enter().append("circle") 46 | .attr("class", "node") 47 | .attr("r", function(d) { return getrank(d.rank); } ) 48 | .style("fill", function(d) { return getcolor(d.rank); }) 49 | .on("dblclick",function(d) { 50 | if ( confirm('Do you want to open '+d.url) ) 51 | window.open(d.url,'_new',''); 52 | d3.event.stopPropagation(); 53 | }) 54 | .call(force.drag); 55 | 56 | node.append("title") 57 | .text(function(d) { return d.url; }); 58 | 59 | force.on("tick", function() { 60 | link.attr("x1", function(d) { return d.source.x; }) 61 | .attr("y1", function(d) { return d.source.y; }) 62 | .attr("x2", function(d) { return d.target.x; }) 63 | .attr("y2", function(d) { return d.target.y; }); 64 | 65 | node.attr("cx", function(d) { return d.x; }) 66 | .attr("cy", function(d) { return d.y; }); 67 | }); 68 | 69 | } 70 | loadData(spiderJson); 71 | -------------------------------------------------------------------------------- /code/pagerank/spdump.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('spider.sqlite') 4 | cur = conn.cursor() 5 | 6 | cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url 7 | FROM Pages JOIN Links ON Pages.id = Links.to_id 8 | WHERE html IS NOT NULL 9 | GROUP BY id ORDER BY inbound DESC''') 10 | 11 | count = 0 12 | for row in cur : 13 | if count < 50 : print row 14 | count = count + 1 15 | print count, 'rows.' 16 | cur.close() 17 | -------------------------------------------------------------------------------- /code/pagerank/spider.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import urllib 3 | import ssl 4 | from urlparse import urljoin 5 | from urlparse import urlparse 6 | from BeautifulSoup import * 7 | 8 | # Deal with SSL certificate anomalies 9 | scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1) 10 | 11 | conn = sqlite3.connect('spider.sqlite') 12 | cur = conn.cursor() 13 | 14 | cur.execute('''CREATE TABLE IF NOT EXISTS Pages 15 | (id INTEGER PRIMARY KEY, url TEXT UNIQUE, html TEXT, 16 | error INTEGER, old_rank REAL, new_rank REAL)''') 17 | 18 | cur.execute('''CREATE TABLE IF NOT EXISTS Links 19 | (from_id INTEGER, to_id INTEGER)''') 20 | 21 | cur.execute('''CREATE TABLE IF NOT EXISTS Webs (url TEXT UNIQUE)''') 22 | 23 | # Check to see if we are already in progress... 24 | cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1') 25 | row = cur.fetchone() 26 | if row is not None: 27 | print "Restarting existing crawl. Remove spider.sqlite to start a fresh crawl." 28 | else : 29 | starturl = raw_input('Enter web url or enter: ') 30 | if ( len(starturl) < 1 ) : starturl = 'http://www.dr-chuck.com/' 31 | if ( starturl.endswith('/') ) : starturl = starturl[:-1] 32 | web = starturl 33 | if ( starturl.endswith('.htm') or starturl.endswith('.html') ) : 34 | pos = starturl.rfind('/') 35 | web = starturl[:pos] 36 | 37 | if ( len(web) > 1 ) : 38 | cur.execute('INSERT OR IGNORE INTO Webs (url) VALUES ( ? )', ( web, ) ) 39 | cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( starturl, ) ) 40 | conn.commit() 41 | 42 | # Get the current webs 43 | cur.execute('''SELECT url FROM Webs''') 44 | webs = list() 45 | for row in cur: 46 | webs.append(str(row[0])) 47 | 48 | print webs 49 | 50 | many = 0 51 | while True: 52 | if ( many < 1 ) : 53 | sval = raw_input('How many pages:') 54 | if ( len(sval) < 1 ) : break 55 | many = int(sval) 56 | many = many - 1 57 | 58 | cur.execute('SELECT id,url FROM Pages WHERE html is NULL and error is NULL ORDER BY RANDOM() LIMIT 1') 59 | try: 60 | row = cur.fetchone() 61 | # print row 62 | fromid = row[0] 63 | url = row[1] 64 | except: 65 | print 'No unretrieved HTML pages found' 66 | many = 0 67 | break 68 | 69 | print fromid, url, 70 | 71 | # If we are retrieving this page, there should be no links from it 72 | cur.execute('DELETE from Links WHERE from_id=?', (fromid, ) ) 73 | try: 74 | document = urllib.urlopen(url, context=scontext) 75 | html = document.read() 76 | if document.getcode() != 200 : 77 | cur.execute('UPDATE Pages SET error=? WHERE url=?', (document.getcode(), url) ) 78 | if 'text/html' != document.info().gettype() : 79 | print "Ignore non text/html page" 80 | cur.execute('DELETE FROM Pages WHERE url=?', ( url, ) ) 81 | cur.execute('UPDATE Pages SET error=0 WHERE url=?', (url, ) ) 82 | conn.commit() 83 | continue 84 | 85 | soup = BeautifulSoup(html) 86 | except KeyboardInterrupt: 87 | print '' 88 | print 'Program interrupted by user...' 89 | break 90 | except: 91 | print "Unable to retrieve or parse page" 92 | cur.execute('UPDATE Pages SET error=-1 WHERE url=?', (url, ) ) 93 | conn.commit() 94 | continue 95 | 96 | cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( url, ) ) 97 | cur.execute('UPDATE Pages SET html=? WHERE url=?', (buffer(html), url ) ) 98 | conn.commit() 99 | 100 | # Retrieve all of the anchor tags 101 | tags = soup('a') 102 | count = 0 103 | for tag in tags: 104 | href = tag.get('href', None) 105 | if ( href is None ) : continue 106 | # Resolve relative references like href="/contact" 107 | up = urlparse(href) 108 | if ( len(up.scheme) < 1 ) : 109 | href = urljoin(url, href) 110 | ipos = href.find('#') 111 | if ( ipos > 1 ) : href = href[:ipos] 112 | if ( href.endswith('.png') or href.endswith('.jpg') or href.endswith('.gif') ) : continue 113 | if ( href.endswith('/') ) : href = href[:-1] 114 | # print href 115 | if ( len(href) < 1 ) : continue 116 | 117 | # Check if the URL is in any of the webs 118 | found = False 119 | for web in webs: 120 | if ( href.startswith(web) ) : 121 | found = True 122 | break 123 | if not found : continue 124 | 125 | cur.execute('INSERT OR IGNORE INTO Pages (url, html, new_rank) VALUES ( ?, NULL, 1.0 )', ( href, ) ) 126 | count = count + 1 127 | conn.commit() 128 | 129 | cur.execute('SELECT id FROM Pages WHERE url=? LIMIT 1', ( href, )) 130 | try: 131 | row = cur.fetchone() 132 | toid = row[0] 133 | except: 134 | print 'Could not retrieve id' 135 | continue 136 | # print fromid, toid 137 | cur.execute('INSERT OR IGNORE INTO Links (from_id, to_id) VALUES ( ?, ? )', ( fromid, toid ) ) 138 | 139 | 140 | print count 141 | 142 | cur.close() 143 | 144 | -------------------------------------------------------------------------------- /code/pagerank/spjson.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('spider.sqlite') 4 | cur = conn.cursor() 5 | 6 | print "Creating JSON output on spider.js..." 7 | howmany = int(raw_input("How many nodes? ")) 8 | 9 | cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url 10 | FROM Pages JOIN Links ON Pages.id = Links.to_id 11 | WHERE html IS NOT NULL AND ERROR IS NULL 12 | GROUP BY id ORDER BY id,inbound''') 13 | 14 | fhand = open('spider.js','w') 15 | nodes = list() 16 | maxrank = None 17 | minrank = None 18 | for row in cur : 19 | nodes.append(row) 20 | rank = row[2] 21 | if maxrank < rank or maxrank is None : maxrank = rank 22 | if minrank > rank or minrank is None : minrank = rank 23 | if len(nodes) > howmany : break 24 | 25 | if maxrank == minrank or maxrank is None or minrank is None: 26 | print "Error - please run sprank.py to compute page rank" 27 | quit() 28 | 29 | fhand.write('spiderJson = {"nodes":[\n') 30 | count = 0 31 | map = dict() 32 | ranks = dict() 33 | for row in nodes : 34 | if count > 0 : fhand.write(',\n') 35 | # print row 36 | rank = row[2] 37 | rank = 19 * ( (rank - minrank) / (maxrank - minrank) ) 38 | fhand.write('{'+'"weight":'+str(row[0])+',"rank":'+str(rank)+',') 39 | fhand.write(' "id":'+str(row[3])+', "url":"'+row[4]+'"}') 40 | map[row[3]] = count 41 | ranks[row[3]] = rank 42 | count = count + 1 43 | fhand.write('],\n') 44 | 45 | cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''') 46 | fhand.write('"links":[\n') 47 | 48 | count = 0 49 | for row in cur : 50 | # print row 51 | if row[0] not in map or row[1] not in map : continue 52 | if count > 0 : fhand.write(',\n') 53 | rank = ranks[row[0]] 54 | srank = 19 * ( (rank - minrank) / (maxrank - minrank) ) 55 | fhand.write('{"source":'+str(map[row[0]])+',"target":'+str(map[row[1]])+',"value":3}') 56 | count = count + 1 57 | fhand.write(']};') 58 | fhand.close() 59 | cur.close() 60 | 61 | print "Open force.html in a browser to view the visualization" 62 | -------------------------------------------------------------------------------- /code/pagerank/sprank.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('spider.sqlite') 4 | cur = conn.cursor() 5 | 6 | # Find the ids that send out page rank - we only are interested 7 | # in pages in the SCC that have in and out links 8 | cur.execute('''SELECT DISTINCT from_id FROM Links''') 9 | from_ids = list() 10 | for row in cur: 11 | from_ids.append(row[0]) 12 | 13 | # Find the ids that receive page rank 14 | to_ids = list() 15 | links = list() 16 | cur.execute('''SELECT DISTINCT from_id, to_id FROM Links''') 17 | for row in cur: 18 | from_id = row[0] 19 | to_id = row[1] 20 | if from_id == to_id : continue 21 | if from_id not in from_ids : continue 22 | if to_id not in from_ids : continue 23 | links.append(row) 24 | if to_id not in to_ids : to_ids.append(to_id) 25 | 26 | # Get latest page ranks for strongly connected component 27 | prev_ranks = dict() 28 | for node in from_ids: 29 | cur.execute('''SELECT new_rank FROM Pages WHERE id = ?''', (node, )) 30 | row = cur.fetchone() 31 | prev_ranks[node] = row[0] 32 | 33 | sval = raw_input('How many iterations:') 34 | many = 1 35 | if ( len(sval) > 0 ) : many = int(sval) 36 | 37 | # Sanity check 38 | if len(prev_ranks) < 1 : 39 | print "Nothing to page rank. Check data." 40 | quit() 41 | 42 | # Lets do Page Rank in memory so it is really fast 43 | for i in range(many): 44 | # print prev_ranks.items()[:5] 45 | next_ranks = dict(); 46 | total = 0.0 47 | for (node, old_rank) in prev_ranks.items(): 48 | total = total + old_rank 49 | next_ranks[node] = 0.0 50 | # print total 51 | 52 | # Find the number of outbound links and sent the page rank down each 53 | for (node, old_rank) in prev_ranks.items(): 54 | # print node, old_rank 55 | give_ids = list() 56 | for (from_id, to_id) in links: 57 | if from_id != node : continue 58 | # print ' ',from_id,to_id 59 | 60 | if to_id not in to_ids: continue 61 | give_ids.append(to_id) 62 | if ( len(give_ids) < 1 ) : continue 63 | amount = old_rank / len(give_ids) 64 | # print node, old_rank,amount, give_ids 65 | 66 | for id in give_ids: 67 | next_ranks[id] = next_ranks[id] + amount 68 | 69 | newtot = 0 70 | for (node, next_rank) in next_ranks.items(): 71 | newtot = newtot + next_rank 72 | evap = (total - newtot) / len(next_ranks) 73 | 74 | # print newtot, evap 75 | for node in next_ranks: 76 | next_ranks[node] = next_ranks[node] + evap 77 | 78 | newtot = 0 79 | for (node, next_rank) in next_ranks.items(): 80 | newtot = newtot + next_rank 81 | 82 | # Compute the per-page average change from old rank to new rank 83 | # As indication of convergence of the algorithm 84 | totdiff = 0 85 | for (node, old_rank) in prev_ranks.items(): 86 | new_rank = next_ranks[node] 87 | diff = abs(old_rank-new_rank) 88 | totdiff = totdiff + diff 89 | 90 | avediff = totdiff / len(prev_ranks) 91 | print i+1, avediff 92 | 93 | # rotate 94 | prev_ranks = next_ranks 95 | 96 | # Put the final ranks back into the database 97 | print next_ranks.items()[:5] 98 | cur.execute('''UPDATE Pages SET old_rank=new_rank''') 99 | for (id, new_rank) in next_ranks.items() : 100 | cur.execute('''UPDATE Pages SET new_rank=? WHERE id=?''', (new_rank, id)) 101 | conn.commit() 102 | cur.close() 103 | 104 | -------------------------------------------------------------------------------- /code/pagerank/spreset.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('spider.sqlite') 4 | cur = conn.cursor() 5 | 6 | cur.execute('''UPDATE Pages SET new_rank=1.0, old_rank=0.0''') 7 | conn.commit() 8 | 9 | cur.close() 10 | 11 | print "All pages set to a rank of 1.0" 12 | -------------------------------------------------------------------------------- /code/pals.py: -------------------------------------------------------------------------------- 1 | friends = ['Joseph', 'Glenn', 'Sally'] 2 | for friend in friends: 3 | print 'Happy New Year:', friend 4 | print 'Done!' 5 | 6 | -------------------------------------------------------------------------------- /code/pay.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter Hours: ') 2 | hours = float(inp) 3 | inp = raw_input('Enter Rate: ') 4 | rate = float(inp) 5 | pay = hours * rate 6 | print 'Pay:', pay 7 | -------------------------------------------------------------------------------- /code/pay2.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter Hours: ') 2 | hours = float(inp) 3 | inp = raw_input('Enter Rate: ') 4 | rate = float(inp) 5 | if hours > 40: 6 | pay = hours * rate + (hours - 40) * rate * 0.5 7 | else: 8 | pay = hours * rate 9 | print 'Pay:', pay 10 | -------------------------------------------------------------------------------- /code/pay3.py: -------------------------------------------------------------------------------- 1 | try: 2 | inp = raw_input('Enter Hours: ') 3 | hours = float(inp) 4 | inp = raw_input('Enter Rate: ') 5 | rate = float(inp) 6 | if hours > 40: 7 | pay = hours * rate + (hours - 40) * rate * 1.5 8 | else: 9 | pay = hours * rate 10 | print 'Pay:', pay 11 | except: 12 | print 'Error, please enter numeric input' 13 | -------------------------------------------------------------------------------- /code/re01.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | if re.search('From:', line) : 7 | print line 8 | 9 | -------------------------------------------------------------------------------- /code/re02.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | if re.search('^From:', line) : 7 | print line 8 | 9 | -------------------------------------------------------------------------------- /code/re03.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | if re.search('^F..m:', line) : 7 | print line 8 | 9 | -------------------------------------------------------------------------------- /code/re04.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | if re.search('^From:.+@', line) : 7 | print line 8 | 9 | -------------------------------------------------------------------------------- /code/re05.py: -------------------------------------------------------------------------------- 1 | import re 2 | s = 'Hello this is a message from csev@umich.edu to cwen@iupui.edu about the meeting @2PM' 3 | lst = re.findall('\S+@\S+', s) 4 | print lst 5 | 6 | -------------------------------------------------------------------------------- /code/re06.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('\S+@\S+', line) 7 | if len(x) > 0 : 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re07.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('[a-zA-Z0-9]\S+@\S+[a-zA-Z]', line) 7 | if len(x) > 0 : 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re08.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('^X\S*: (\S+)', line) 7 | if not x : continue 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re09.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | if re.search('^X\S*: [0-9.]+', line) : 7 | print line 8 | 9 | -------------------------------------------------------------------------------- /code/re10.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('^X\S*: ([0-9.]+)', line) 7 | if len(x) > 0 : 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re11.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('^Details:.*rev=([0-9.]+)', line) 7 | if len(x) > 0: 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re12.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('^From .* ([0-9][0-9]):', line) 7 | if len(x) > 0 : print x 8 | 9 | -------------------------------------------------------------------------------- /code/re13.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | hand = open('mbox-short.txt') 4 | for line in hand: 5 | line = line.rstrip() 6 | x = re.findall('Author:.*@(\S+)', line) 7 | if not x : continue 8 | print x 9 | 10 | -------------------------------------------------------------------------------- /code/re14.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import re 3 | fname = raw_input('Enter file:') 4 | hand = open(fname) 5 | nums = list() 6 | for line in hand: 7 | line = line.rstrip() 8 | x = re.findall('New Revision: ([0-9]+)', line) 9 | if len(x) == 1 : 10 | val = float(x[0]) 11 | nums.append(val) 12 | print len(nums) 13 | print sum(nums)/len(nums) 14 | 15 | -------------------------------------------------------------------------------- /code/romeo.txt: -------------------------------------------------------------------------------- 1 | But soft what light through yonder window breaks 2 | It is the east and Juliet is the sun 3 | Arise fair sun and kill the envious moon 4 | Who is already sick and pale with grief 5 | -------------------------------------------------------------------------------- /code/search1.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | count = 0 3 | for line in fhand: 4 | if line.startswith('From:') : 5 | print line 6 | -------------------------------------------------------------------------------- /code/search10.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | for line in fhand: 3 | words = line.split() 4 | # print 'Debug:', words 5 | if len(words) == 0 : continue 6 | if words[0] != 'From' : continue 7 | print words[2] 8 | -------------------------------------------------------------------------------- /code/search2.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | for line in fhand: 3 | line = line.rstrip() 4 | if line.startswith('From:') : 5 | print line 6 | -------------------------------------------------------------------------------- /code/search3.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | for line in fhand: 3 | line = line.rstrip() 4 | # Skip 'uninteresting lines' 5 | if not line.startswith('From:') : 6 | continue 7 | # Process our 'interesting' line 8 | print line 9 | -------------------------------------------------------------------------------- /code/search4.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | for line in fhand: 3 | line = line.rstrip() 4 | if line.find('@uct.ac.za') == -1 : continue 5 | print line 6 | -------------------------------------------------------------------------------- /code/search5.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | for line in fhand: 3 | line = line.rstrip() 4 | if not line.startswith('From ') : continue 5 | words = line.split() 6 | print words[2] 7 | -------------------------------------------------------------------------------- /code/search6.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter the file name: ') 2 | fhand = open(fname) 3 | count = 0 4 | for line in fhand: 5 | if line.startswith('Subject:') : 6 | count = count + 1 7 | print 'There were', count, 'subject lines in', fname 8 | -------------------------------------------------------------------------------- /code/search7.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter the file name: ') 2 | try: 3 | fhand = open(fname) 4 | except: 5 | print 'File cannot be opened:', fname 6 | exit() 7 | count = 0 8 | for line in fhand: 9 | if line.startswith('Subject:') : 10 | count = count + 1 11 | print 'There were', count, 'subject lines in', fname 12 | -------------------------------------------------------------------------------- /code/search8.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | count = 0 3 | for line in fhand: 4 | words = line.split() 5 | if words[0] != 'From' : continue 6 | print words[2] 7 | -------------------------------------------------------------------------------- /code/search9.py: -------------------------------------------------------------------------------- 1 | fhand = open('mbox-short.txt') 2 | count = 0 3 | for line in fhand: 4 | words = line.split() 5 | print 'Debug:', words 6 | if words[0] != 'From' : continue 7 | print words[2] 8 | -------------------------------------------------------------------------------- /code/sequence.py: -------------------------------------------------------------------------------- 1 | inp = raw_input('Enter a Number:') 2 | n = int(inp) 3 | while n != 1: 4 | print n, # Use comma to suppress newline 5 | if n%2 == 0: # n is even 6 | n = n/2 7 | else: # n is odd 8 | n = n*3+1 9 | -------------------------------------------------------------------------------- /code/socket1.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 4 | mysock.connect(('data.pr4e.org', 80)) 5 | mysock.send('GET http://data.pr4e.org/romeo.txt HTTP/1.0\r\n\r\n') 6 | 7 | while True: 8 | data = mysock.recv(512) 9 | if ( len(data) < 1 ) : 10 | break 11 | print data; 12 | 13 | mysock.close() 14 | -------------------------------------------------------------------------------- /code/socket2.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | url = raw_input('Enter: ') 4 | words = url.split('/') 5 | host = words[2] 6 | 7 | mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 8 | mysock.connect((host, 80)) 9 | mysock.send('GET '+url+' HTTP/1.0\n\n') 10 | 11 | while True: 12 | data = mysock.recv(512) 13 | if ( len(data) < 1 ) : 14 | break 15 | print data, 16 | 17 | mysock.close() 18 | 19 | -------------------------------------------------------------------------------- /code/soft.py: -------------------------------------------------------------------------------- 1 | txt = 'but soft what light in yonder window breaks' 2 | words = txt.split() 3 | t = list() 4 | for word in words: 5 | t.append((len(word), word)) 6 | 7 | t.sort(reverse=True) 8 | 9 | res = list() 10 | for length, word in t: 11 | res.append(word) 12 | 13 | print res 14 | -------------------------------------------------------------------------------- /code/spamave.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter the file name: ') 2 | try: 3 | fhand = open(fname) 4 | except: 5 | print 'File cannot be opened:', fname 6 | exit() 7 | count = 0 8 | total = 0 9 | for line in fhand: 10 | words = line.split() 11 | if len(words) != 2 : continue 12 | if words[0] != 'X-DSPAM-Confidence:' : continue 13 | try: 14 | conf = float(words[1]) 15 | except: 16 | continue 17 | count = count + 1 18 | total = total + conf 19 | average = total / count 20 | print 'Average spam confidence:', average 21 | -------------------------------------------------------------------------------- /code/twdump.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('spider.sqlite') 4 | cur = conn.cursor() 5 | cur.execute('SELECT * FROM Twitter') 6 | count = 0 7 | for row in cur : 8 | print row 9 | count = count + 1 10 | print count, 'rows.' 11 | cur.close() 12 | -------------------------------------------------------------------------------- /code/twfriends.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import twurl 3 | import json 4 | import sqlite3 5 | 6 | TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json' 7 | 8 | conn = sqlite3.connect('friends.sqlite') 9 | cur = conn.cursor() 10 | 11 | cur.execute('''CREATE TABLE IF NOT EXISTS People 12 | (id INTEGER PRIMARY KEY, name TEXT UNIQUE, retrieved INTEGER)''') 13 | cur.execute('''CREATE TABLE IF NOT EXISTS Follows 14 | (from_id INTEGER, to_id INTEGER, UNIQUE(from_id, to_id))''') 15 | 16 | while True: 17 | acct = raw_input('Enter a Twitter account, or quit: ') 18 | if ( acct == 'quit' ) : break 19 | if ( len(acct) < 1 ) : 20 | cur.execute('SELECT id, name FROM People WHERE retrieved = 0 LIMIT 1') 21 | try: 22 | (id, acct) = cur.fetchone() 23 | except: 24 | print 'No unretrieved Twitter accounts found' 25 | continue 26 | else: 27 | cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1', 28 | (acct, ) ) 29 | try: 30 | id = cur.fetchone()[0] 31 | except: 32 | cur.execute('INSERT OR IGNORE INTO People (name, retrieved) VALUES ( ?, 0)', 33 | ( acct, ) ) 34 | conn.commit() 35 | if cur.rowcount != 1 : 36 | print 'Error inserting account:',acct 37 | continue 38 | id = cur.lastrowid 39 | 40 | url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '5'} ) 41 | print 'Retrieving account', acct 42 | connection = urllib.urlopen(url) 43 | data = connection.read() 44 | headers = connection.info().dict 45 | print 'Remaining', headers['x-rate-limit-remaining'] 46 | 47 | js = json.loads(data) 48 | # print json.dumps(js, indent=4) 49 | 50 | cur.execute('UPDATE People SET retrieved=1 WHERE name = ?', (acct, ) ) 51 | 52 | countnew = 0 53 | countold = 0 54 | for u in js['users'] : 55 | friend = u['screen_name'] 56 | print friend 57 | cur.execute('SELECT id FROM People WHERE name = ? LIMIT 1', 58 | (friend, ) ) 59 | try: 60 | friend_id = cur.fetchone()[0] 61 | countold = countold + 1 62 | except: 63 | cur.execute('''INSERT OR IGNORE INTO People (name, retrieved) 64 | VALUES ( ?, 0)''', ( friend, ) ) 65 | conn.commit() 66 | if cur.rowcount != 1 : 67 | print 'Error inserting account:',friend 68 | continue 69 | friend_id = cur.lastrowid 70 | countnew = countnew + 1 71 | cur.execute('INSERT OR IGNORE INTO Follows (from_id, to_id) VALUES (?, ?)', 72 | (id, friend_id) ) 73 | print 'New accounts=',countnew,' revisited=',countold 74 | conn.commit() 75 | 76 | cur.close() 77 | 78 | -------------------------------------------------------------------------------- /code/twitter1.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import twurl 3 | 4 | TWITTER_URL = 'https://api.twitter.com/1.1/statuses/user_timeline.json' 5 | 6 | while True: 7 | print '' 8 | acct = raw_input('Enter Twitter Account:') 9 | if ( len(acct) < 1 ) : break 10 | url = twurl.augment(TWITTER_URL, 11 | {'screen_name': acct, 'count': '2'} ) 12 | print 'Retrieving', url 13 | connection = urllib.urlopen(url) 14 | data = connection.read() 15 | print data[:250] 16 | headers = connection.info().dict 17 | # print headers 18 | print 'Remaining', headers['x-rate-limit-remaining'] 19 | -------------------------------------------------------------------------------- /code/twitter2.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import twurl 3 | import json 4 | 5 | TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json' 6 | 7 | while True: 8 | print '' 9 | acct = raw_input('Enter Twitter Account:') 10 | if ( len(acct) < 1 ) : break 11 | url = twurl.augment(TWITTER_URL, 12 | {'screen_name': acct, 'count': '5'} ) 13 | print 'Retrieving', url 14 | connection = urllib.urlopen(url) 15 | data = connection.read() 16 | headers = connection.info().dict 17 | print 'Remaining', headers['x-rate-limit-remaining'] 18 | js = json.loads(data) 19 | print json.dumps(js, indent=4) 20 | 21 | for u in js['users'] : 22 | print u['screen_name'] 23 | s = u['status']['text'] 24 | print ' ',s[:50] 25 | -------------------------------------------------------------------------------- /code/twjoin.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | conn = sqlite3.connect('friends.sqlite') 4 | cur = conn.cursor() 5 | 6 | cur.execute('SELECT * FROM People') 7 | count = 0 8 | print 'People:' 9 | for row in cur : 10 | if count < 5: print row 11 | count = count + 1 12 | print count, 'rows.' 13 | 14 | cur.execute('SELECT * FROM Follows') 15 | count = 0 16 | print 'Follows:' 17 | for row in cur : 18 | if count < 5: print row 19 | count = count + 1 20 | print count, 'rows.' 21 | 22 | cur.execute('''SELECT * FROM Follows JOIN People 23 | ON Follows.to_id = People.id WHERE Follows.from_id = 2''') 24 | count = 0 25 | print 'Connections for id=2:' 26 | for row in cur : 27 | if count < 5: print row 28 | count = count + 1 29 | print count, 'rows.' 30 | 31 | cur.close() 32 | -------------------------------------------------------------------------------- /code/twspider.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import twurl 3 | import json 4 | import sqlite3 5 | 6 | TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json' 7 | 8 | conn = sqlite3.connect('spider.sqlite') 9 | cur = conn.cursor() 10 | 11 | cur.execute(''' 12 | CREATE TABLE IF NOT EXISTS Twitter (name TEXT, retrieved INTEGER, friends INTEGER)''') 13 | 14 | while True: 15 | acct = raw_input('Enter a Twitter account, or quit: ') 16 | if ( acct == 'quit' ) : break 17 | if ( len(acct) < 1 ) : 18 | cur.execute('SELECT name FROM Twitter WHERE retrieved = 0 LIMIT 1') 19 | try: 20 | acct = cur.fetchone()[0] 21 | except: 22 | print 'No unretrieved Twitter accounts found' 23 | continue 24 | 25 | url = twurl.augment(TWITTER_URL, {'screen_name': acct, 'count': '5'} ) 26 | print 'Retrieving', url 27 | connection = urllib.urlopen(url) 28 | data = connection.read() 29 | headers = connection.info().dict 30 | print 'Remaining', headers['x-rate-limit-remaining'] 31 | js = json.loads(data) 32 | # print json.dumps(js, indent=4) 33 | 34 | cur.execute('UPDATE Twitter SET retrieved=1 WHERE name = ?', (acct, ) ) 35 | 36 | countnew = 0 37 | countold = 0 38 | for u in js['users'] : 39 | friend = u['screen_name'] 40 | print friend 41 | cur.execute('SELECT friends FROM Twitter WHERE name = ? LIMIT 1', 42 | (friend, ) ) 43 | try: 44 | count = cur.fetchone()[0] 45 | cur.execute('UPDATE Twitter SET friends = ? WHERE name = ?', 46 | (count+1, friend) ) 47 | countold = countold + 1 48 | except: 49 | cur.execute('''INSERT INTO Twitter (name, retrieved, friends) 50 | VALUES ( ?, 0, 1 )''', ( friend, ) ) 51 | countnew = countnew + 1 52 | print 'New accounts=',countnew,' revisited=',countold 53 | conn.commit() 54 | 55 | cur.close() 56 | 57 | -------------------------------------------------------------------------------- /code/twurl.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import oauth 3 | import hidden 4 | 5 | def augment(url, parameters) : 6 | secrets = hidden.oauth() 7 | consumer = oauth.OAuthConsumer(secrets['consumer_key'], secrets['consumer_secret']) 8 | token = oauth.OAuthToken(secrets['token_key'],secrets['token_secret']) 9 | 10 | oauth_request = oauth.OAuthRequest.from_consumer_and_token(consumer, 11 | token=token, http_method='GET', http_url=url, parameters=parameters) 12 | oauth_request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(), consumer, token) 13 | return oauth_request.to_url() 14 | 15 | 16 | def test_me() : 17 | print '* Calling Twitter...' 18 | url = augment('https://api.twitter.com/1.1/statuses/user_timeline.json', 19 | {'screen_name': 'drchuck', 'count': '2'} ) 20 | print url 21 | connection = urllib.urlopen(url) 22 | data = connection.read() 23 | print data 24 | headers = connection.info().dict 25 | print headers 26 | -------------------------------------------------------------------------------- /code/txtcheck.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | for (dirname, dirs, files) in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | thefile = os.path.join(dirname,filename) 7 | size = os.path.getsize(thefile) 8 | if size == 2578 or size == 2565: 9 | continue 10 | fhand = open(thefile,'r') 11 | lines = list() 12 | for line in fhand: 13 | lines.append(line) 14 | fhand.close() 15 | if len(lines) > 1: 16 | print len(lines), thefile 17 | print lines[:4] 18 | -------------------------------------------------------------------------------- /code/txtcheck2.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | for (dirname, dirs, files) in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | thefile = os.path.join(dirname,filename) 7 | size = os.path.getsize(thefile) 8 | if size == 2578 or size == 2565: 9 | continue 10 | fhand = open(thefile,'r') 11 | lines = list() 12 | for line in fhand: 13 | lines.append(line) 14 | fhand.close() 15 | if len(lines) == 3 and lines[2].startswith('Sent from my iPhone') : 16 | continue 17 | if len(lines) > 1: 18 | print len(lines), thefile 19 | print lines[:4] 20 | -------------------------------------------------------------------------------- /code/txtcheck3.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | for (dirname, dirs, files) in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | thefile = os.path.join(dirname,filename) 7 | size = os.path.getsize(thefile) 8 | if size == 2578 or size == 2565: 9 | print 'T-Mobile:',thefile 10 | continue 11 | fhand = open(thefile,'r') 12 | lines = list() 13 | for line in fhand: 14 | lines.append(line) 15 | fhand.close() 16 | if len(lines) == 3 and lines[2].startswith('Sent from my iPhone') : 17 | print 'iPhone:', thefile 18 | continue 19 | -------------------------------------------------------------------------------- /code/txtcount.py: -------------------------------------------------------------------------------- 1 | import os 2 | count = 0 3 | for dirname, dirs, files in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | count = count + 1 7 | 8 | print 'Files:', count 9 | -------------------------------------------------------------------------------- /code/txtdelete.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | for (dirname, dirs, files) in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | thefile = os.path.join(dirname,filename) 7 | size = os.path.getsize(thefile) 8 | if size == 2578 or size == 2565: 9 | print 'T-Mobile:',thefile 10 | os.remove(thefile) 11 | continue 12 | fhand = open(thefile,'r') 13 | lines = list() 14 | for line in fhand: 15 | lines.append(line) 16 | fhand.close() 17 | if len(lines) == 3 and lines[2].startswith('Sent from my iPhone') : 18 | print 'iPhone:', thefile 19 | os.remove(thefile) 20 | continue 21 | -------------------------------------------------------------------------------- /code/txtmd5.py: -------------------------------------------------------------------------------- 1 | import os 2 | import hashlib 3 | from os.path import join 4 | 5 | hashes = dict() 6 | for (dirname, dirs, files) in os.walk('.'): 7 | for filename in files: 8 | if filename.endswith('.txt') : 9 | thefile = os.path.join(dirname,filename) 10 | fhand = open(thefile,'r') 11 | data = fhand.read() 12 | fhand.close() 13 | hash = hashlib.md5(data).hexdigest() 14 | # print thefile, hash 15 | if hash in hashes: 16 | print hashes[hash], thefile 17 | else: 18 | hashes[hash] = thefile 19 | -------------------------------------------------------------------------------- /code/txtsize.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join 3 | for (dirname, dirs, files) in os.walk('.'): 4 | for filename in files: 5 | if filename.endswith('.txt') : 6 | thefile = os.path.join(dirname,filename) 7 | print os.path.getsize(thefile), thefile 8 | -------------------------------------------------------------------------------- /code/urljpeg.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import time 3 | 4 | mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 5 | mysock.connect(('www.py4inf.com', 80)) 6 | mysock.send('GET http://www.py4inf.com/cover.jpg HTTP/1.0\n\n') 7 | 8 | 9 | count = 0 10 | picture = ""; 11 | while True: 12 | data = mysock.recv(5120) 13 | if ( len(data) < 1 ) : break 14 | time.sleep(0.25) 15 | count = count + len(data) 16 | print len(data),count 17 | picture = picture + data 18 | 19 | mysock.close() 20 | 21 | # Look for the end of the header (2 CRLF) 22 | pos = picture.find("\r\n\r\n"); 23 | print 'Header length',pos 24 | print picture[:pos] 25 | 26 | # Skip past the header and save the picture data 27 | picture = picture[pos+4:] 28 | fhand = open("stuff.jpg","wb") 29 | fhand.write(picture); 30 | fhand.close() 31 | -------------------------------------------------------------------------------- /code/urllib1.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | fhand = urllib.urlopen('http://www.py4inf.com/code/romeo.txt') 4 | for line in fhand: 5 | print line.strip() 6 | 7 | -------------------------------------------------------------------------------- /code/urllib2.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | fhand = urllib.urlopen('http://www.dr-chuck.com/page1.htm') 4 | for line in fhand: 5 | print line.strip() -------------------------------------------------------------------------------- /code/urllink2.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | from BeautifulSoup import * 3 | 4 | url = raw_input('Enter - ') 5 | html = urllib.urlopen(url).read() 6 | 7 | soup = BeautifulSoup(html) 8 | 9 | # Retrieve all of the anchor tags 10 | tags = soup('a') 11 | for tag in tags: 12 | # Look at the parts of a tag 13 | print 'TAG:',tag 14 | print 'URL:',tag.get('href', None) 15 | print 'Contents:',tag.contents[0] 16 | print 'Attrs:',tag.attrs 17 | -------------------------------------------------------------------------------- /code/urllink3.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | from BeautifulSoup import * 3 | 4 | todo = list() 5 | visited = list() 6 | url = raw_input('Enter - ') 7 | todo.append(url) 8 | 9 | while len(todo) > 0 : 10 | print "====== Todo list count is ",len(todo) 11 | url = todo.pop() 12 | 13 | if ( not url.startswith('http') ) : 14 | print "Skipping", url 15 | continue 16 | 17 | if ( url.find('facebook') > 0 ) : 18 | continue 19 | 20 | if ( url in visited ) : 21 | print "Visited", url 22 | continue 23 | 24 | print "===== Retrieving ", url 25 | 26 | html = urllib.urlopen(url).read() 27 | soup = BeautifulSoup(html) 28 | visited.append(url) 29 | 30 | # Retrieve all of the anchor tags 31 | tags = soup('a') 32 | for tag in tags: 33 | newurl = tag.get('href', None) 34 | if ( newurl != None ) : 35 | todo.append(newurl) 36 | 37 | -------------------------------------------------------------------------------- /code/urllinks.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | from BeautifulSoup import * 3 | 4 | url = raw_input('Enter - ') 5 | html = urllib.urlopen(url).read() 6 | soup = BeautifulSoup(html) 7 | 8 | # Retrieve all of the anchor tags 9 | tags = soup('a') 10 | for tag in tags: 11 | print tag.get('href', None) 12 | -------------------------------------------------------------------------------- /code/urlregex.py: -------------------------------------------------------------------------------- 1 | # Search for lines that start with From and have an at sign 2 | import urllib 3 | import re 4 | 5 | url = raw_input('Enter - ') 6 | html = urllib.urlopen(url).read() 7 | links = re.findall('href="(http://.*?)"', html) 8 | for link in links: 9 | print link 10 | 11 | -------------------------------------------------------------------------------- /code/urlwords.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | 3 | counts = dict() 4 | fhand = urllib.urlopen('http://www.py4inf.com/code/romeo.txt') 5 | for line in fhand: 6 | words = line.split() 7 | for word in words: 8 | counts[word] = counts.get(word,0) + 1 9 | print counts 10 | -------------------------------------------------------------------------------- /code/whathour.py: -------------------------------------------------------------------------------- 1 | fname = raw_input('Enter file name: ') 2 | fhand = open(fname) 3 | c = dict() 4 | for line in fhand: 5 | if not line.startswith('From ') : continue 6 | pieces = line.split() 7 | time = pieces[5] 8 | parts = time.split(':') 9 | hour = parts[0] 10 | c[hour] = c.get(hour,0) + 1 11 | 12 | lst = list() 13 | for key in c: 14 | value = c[key] 15 | lst.append( (value, key) ) 16 | 17 | lst.sort() 18 | 19 | for value, key in lst: 20 | print key, value 21 | 22 | 23 | -------------------------------------------------------------------------------- /code/wikidata.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/code/wikidata.db -------------------------------------------------------------------------------- /code/wikigrade.py: -------------------------------------------------------------------------------- 1 | import string 2 | import sqlite3 3 | import urllib 4 | import xml.etree.ElementTree as ET 5 | from BeautifulSoup import * 6 | 7 | conn = sqlite3.connect('wikidata.db') 8 | cur = conn.cursor() 9 | 10 | cur.execute(''' 11 | CREATE TABLE IF NOT EXISTS TinyTable (id INTEGER PRIMARY KEY, 12 | url TEXT, page BLOB, retrieved_at timestamp)''') 13 | 14 | # A slightly extended dictionary 15 | class sash(dict): 16 | def sortvalues(self,reverse=True): 17 | return sorted(self.items(),key=lambda x: (x[1], x[0]), reverse=reverse) 18 | 19 | def tinyTable(url): 20 | global cur,conn 21 | cur.execute('SELECT id,page,retrieved_at FROM TinyTable WHERE URL = ?', (url, )) 22 | try: 23 | row = cur.fetchone() 24 | print 'DATE',row[2] 25 | return row[1] 26 | except: 27 | row = None 28 | print 'Retrieving', url 29 | 30 | data = urllib.urlopen (url).read() 31 | if row != None: 32 | cur.execute("UPDATE TinyTable SET page=?,retrieved_at=datetime('now') WHERE id=?", (unicode(data, 'utf-8'), row[0])) 33 | else: 34 | cur.execute("INSERT INTO TinyTable (url, page, retrieved_at) VALUES (?, ?, datetime('now'))",(url, unicode(data, 'utf-8'))) 35 | conn.commit() 36 | return data 37 | 38 | cururl = 'https://ctools.umich.edu/portal/tool/27500dea-c105-4f7b-a195-3c89536a64b7?pageName=%2Fsite%2Ff57681b8-6db9-46cf-aad1-3a0bdd621138%2Fhome&action=view&panel=Main&realm=%2Fsite%2Ff57681b8-6db9-46cf-aad1-3a0bdd621138' 39 | prefix = 'https://ctools.umich.edu/portal/tool/27500dea-c105-4f7b-a195-3c89536a64b7' 40 | 41 | urls = list() 42 | urls.append(cururl) 43 | visited = list() 44 | editcounts = sash() 45 | postcounts = sash() 46 | 47 | while len(urls) > 0 : 48 | print '=== URLS Yet To Retrieve:',len(urls) 49 | cururl = urls.pop() 50 | if cururl in visited: continue 51 | print 'RETRIEVING',cururl 52 | data = tinyTable(cururl) 53 | visited.append(cururl) 54 | soup = BeautifulSoup(data) 55 | tags = soup('a') 56 | # print 'Tags' 57 | for tag in tags: 58 | print tag 59 | url = tag.get('href',None) 60 | if url == None : continue 61 | # Don't follow absolute urls 62 | if not url.startswith(prefix) : continue 63 | newurl = urllib.basejoin(cururl,url) 64 | if newurl in visited : continue 65 | # print 'APPENDING',newurl 66 | if newurl.find('action=view') > 0 or newurl.find('action=history') > 0 : 67 | urls.append(newurl) 68 | 69 | print 'EDITS:' 70 | for (key,val) in editcounts.sortvalues(): 71 | print key, val 72 | 73 | for (key,val) in sorted(postcounts.items()): 74 | print key, val 75 | 76 | conn.close() 77 | -------------------------------------------------------------------------------- /code/wordlist.py: -------------------------------------------------------------------------------- 1 | name = raw_input('Enter file: ') 2 | handle = open(name, 'r') 3 | wordlist = list() 4 | for line in handle: 5 | words = line.split() 6 | for word in words: 7 | if word in wordlist: continue 8 | wordlist.append(word) 9 | 10 | wordlist.sort() 11 | print wordlist 12 | -------------------------------------------------------------------------------- /code/words.py: -------------------------------------------------------------------------------- 1 | name = raw_input('Enter file:') 2 | handle = open(name, 'r') 3 | text = handle.read() 4 | words = text.split() 5 | counts = dict() 6 | for word in words: 7 | counts[word] = counts.get(word,0) + 1 8 | 9 | bigcount = None 10 | bigword = None 11 | for word,count in counts.items(): 12 | if bigcount == None or count > bigcount: 13 | bigword = word 14 | bigcount = count 15 | 16 | print bigword, bigcount 17 | -------------------------------------------------------------------------------- /code/words.txt: -------------------------------------------------------------------------------- 1 | Writing programs or programming is a very creative 2 | and rewarding activity You can write programs for 3 | many reasons ranging from making your living to solving 4 | a difficult data analysis problem to having fun to helping 5 | someone else solve a problem This book assumes that 6 | {\em everyone} needs to know how to program and that once 7 | you know how to program, you will figure out what you want 8 | to do with your newfound skills 9 | 10 | We are surrounded in our daily lives with computers ranging 11 | from laptops to cell phones We can think of these computers 12 | as our personal assistants who can take care of many things 13 | on our behalf The hardware in our current-day computers 14 | is essentially built to continuously as us the question 15 | What would you like me to do next 16 | 17 | Our computers are fast and have vasts amounts of memory and 18 | could be very helpful to us if we only knew the language to 19 | speak to explain to the computer what we would like it to 20 | do next If we knew this language we could tell the 21 | computer to do tasks on our behalf that were reptitive 22 | Interestingly, the kinds of things computers can do best 23 | are often the kinds of things that we humans find boring 24 | and mind-numbing 25 | -------------------------------------------------------------------------------- /code/xml1.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | data = ''' 4 | 5 | Chuck 6 | 7 | +1 734 303 4456 8 | 9 | 10 | ''' 11 | 12 | tree = ET.fromstring(data) 13 | print 'Name:',tree.find('name').text 14 | print 'Attr:',tree.find('email').get('hide') 15 | -------------------------------------------------------------------------------- /code/xml2.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | 3 | input = ''' 4 | 5 | 6 | 7 | 001 8 | Chuck 9 | 10 | 11 | 009 12 | Brent 13 | 14 | 15 | ''' 16 | 17 | stuff = ET.fromstring(input) 18 | lst = stuff.findall('users/user') 19 | print 'User count:', len(lst) 20 | 21 | for item in lst: 22 | print 'Name', item.find('name').text 23 | print 'Id', item.find('id').text 24 | print 'Attribute', item.get("x") 25 | 26 | -------------------------------------------------------------------------------- /cover/JavaForumFragmentWhite.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/JavaForumFragmentWhite.jpg -------------------------------------------------------------------------------- /cover/backtext.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/backtext.docx -------------------------------------------------------------------------------- /cover/backtext_ko.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/backtext_ko.docx -------------------------------------------------------------------------------- /cover/cover.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 2 5 0 1 0 -1 50 -1 -1 0.000 0 0 -1 0 0 5 11 | 0 JavaForumFragmentWhite.jpg 12 | 0 0 2925 0 2925 2743 0 2743 0 0 13 | 2 5 0 1 0 -1 50 -1 -1 0.000 0 0 -1 0 0 5 14 | 0 greedyalloc.jpg 15 | 300 2775 5034 2775 5034 6375 300 6375 300 2775 16 | 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 17 | 2550 2100 4650 2100 4650 3375 2550 3375 2550 2100 18 | 4 0 0 50 -1 0 12 0.0000 4 150 1650 2625 3150 Charles Severance\001 19 | 4 0 0 50 -1 0 11 0.0000 4 180 1140 2625 2700 Exploring Data\001 20 | 4 0 0 50 -1 -1 12 0.0000 4 195 1920 2625 2475 Python for Informatics\001 21 | -------------------------------------------------------------------------------- /cover/cover_um_lib_003_front.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/cover_um_lib_003_front.jpg -------------------------------------------------------------------------------- /cover/cover_um_lib_003_full.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/cover_um_lib_003_full.jpg -------------------------------------------------------------------------------- /cover/cover_um_lib_003_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/cover_um_lib_003_small.jpg -------------------------------------------------------------------------------- /cover/epub_cover.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/epub_cover.jpg -------------------------------------------------------------------------------- /cover/greedyalloc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/greedyalloc.jpg -------------------------------------------------------------------------------- /cover/isbn.txt: -------------------------------------------------------------------------------- 1 | ISBN-13: 978-1492339243 2 | 3 | ISBN-10: 1492339245 4 | Your book has been assigned a CreateSpace ISBN. 5 | 6 | 7 | -------------------------------------------------------------------------------- /cover/politicalblogs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/cover/politicalblogs.jpg -------------------------------------------------------------------------------- /createspace/BookCoverPreview.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/BookCoverPreview.jpeg -------------------------------------------------------------------------------- /createspace/BookCoverPreviewFront.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/BookCoverPreviewFront.jpg -------------------------------------------------------------------------------- /createspace/Python para informaticos_ Explo - Charles Severance.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/Python para informaticos_ Explo - Charles Severance.epub -------------------------------------------------------------------------------- /createspace/book-2017-05-15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/book-2017-05-15.pdf -------------------------------------------------------------------------------- /createspace/book_272_es.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/book_272_es.pdf -------------------------------------------------------------------------------- /createspace/book_272_es2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/book_272_es2.pdf -------------------------------------------------------------------------------- /createspace/book_272_es3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/createspace/book_272_es3.pdf -------------------------------------------------------------------------------- /figs2/arch.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4275 1200 106 106 4275 1200 4381 1200 11 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4575 900 106 106 4575 900 4681 900 12 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4425 1050 106 106 4425 1050 4531 1050 13 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4725 869 106 106 4725 869 4831 869 14 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4835 727 106 106 4835 727 4941 727 15 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 5025 644 106 106 5025 644 5131 644 16 | # Words 17 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 18 | 2775 3000 4500 3000 4500 4275 2775 4275 2775 3000 19 | # Words 20 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 21 | 5025 3000 6750 3000 6750 4275 5025 4275 5025 3000 22 | # Words 23 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 24 | 6000 1050 7725 1050 7725 2325 6000 2325 6000 1050 25 | # Words 26 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 27 | 525 1050 2250 1050 2250 2325 525 2325 525 1050 28 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 29 | 2250 1725 2775 1725 30 | # Words 31 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 32 | 2550 525 4725 525 4725 4500 2550 4500 2550 525 33 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 34 | 4500 3675 5025 3675 35 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 36 | 4500 1725 6000 1725 37 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 38 | 3600 2325 3600 3000 39 | # Words 40 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 41 | 2775 1050 4500 1050 4500 2325 2775 2325 2775 1050 42 | 2 4 0 3 0 7 50 -1 -1 0.000 0 0 5 0 0 5 43 | 5850 857 5141 857 5141 300 5850 300 5850 857 44 | 4 0 0 50 -1 0 12 0.0000 4 150 645 3075 1275 Central\001 45 | 4 0 0 50 -1 0 12 0.0000 4 195 945 3075 1530 Processing\001 46 | 4 0 0 50 -1 0 12 0.0000 4 150 360 3075 1785 Unit\001 47 | 4 0 0 50 -1 0 12 0.0000 4 150 450 3150 3300 Main\001 48 | 4 0 0 50 -1 0 12 0.0000 4 195 750 3150 3555 Memory\001 49 | 4 0 0 50 -1 0 12 0.0000 4 195 930 5250 3450 Secondary\001 50 | 4 0 0 50 -1 0 12 0.0000 4 195 750 5250 3705 Memory\001 51 | 4 0 0 50 -1 0 12 0.0000 4 150 780 6375 1650 Network\001 52 | 4 0 0 50 -1 0 12 0.0000 4 195 450 825 1500 Input\001 53 | 4 0 0 50 -1 0 12 0.0000 4 150 795 2775 825 Software\001 54 | 4 0 0 50 -1 0 12 0.0000 4 195 585 825 1725 Output\001 55 | 4 0 0 50 -1 0 12 0.0000 4 150 705 825 1950 Devices\001 56 | 4 0 0 50 -1 0 12 0.0000 4 150 465 5250 525 What\001 57 | 4 0 0 50 -1 0 12 0.0000 4 150 525 5250 750 Next?\001 58 | -------------------------------------------------------------------------------- /figs2/arch2.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4275 1200 106 106 4275 1200 4381 1200 11 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4575 900 106 106 4575 900 4681 900 12 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4425 1050 106 106 4425 1050 4531 1050 13 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4725 869 106 106 4725 869 4831 869 14 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 4835 727 106 106 4835 727 4941 727 15 | 1 3 0 3 0 7 49 -1 20 0.000 1 0.0000 5025 644 106 106 5025 644 5131 644 16 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 4125 3600 144 144 4125 3600 4269 3600 17 | # Words 18 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 19 | 2775 3000 4500 3000 4500 4275 2775 4275 2775 3000 20 | # Words 21 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 22 | 5025 3000 6750 3000 6750 4275 5025 4275 5025 3000 23 | # Words 24 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 25 | 6000 1050 7725 1050 7725 2325 6000 2325 6000 1050 26 | # Words 27 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 28 | 525 1050 2250 1050 2250 2325 525 2325 525 1050 29 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 30 | 2250 1725 2775 1725 31 | # Words 32 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 33 | 2550 525 4725 525 4725 4500 2550 4500 2550 525 34 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 35 | 4500 3675 5025 3675 36 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 37 | 4500 1725 6000 1725 38 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 39 | 3600 2325 3600 3000 40 | # Words 41 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 42 | 2775 1050 4500 1050 4500 2325 2775 2325 2775 1050 43 | 2 4 0 3 0 7 50 -1 -1 0.000 0 0 5 0 0 5 44 | 5850 857 5141 857 5141 300 5850 300 5850 857 45 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 46 | 4125 3675 4125 3975 47 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 3 48 | 4125 3975 4050 4050 4050 4125 49 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 3 50 | 4125 3975 4200 4050 4200 4125 51 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 52 | 4125 3900 3975 3825 53 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 54 | 4125 3900 4275 3825 55 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 56 | 2 1 1.00 120.00 120.00 57 | 3525 3900 3825 3900 58 | 4 0 0 50 -1 0 12 0.0000 4 150 645 3075 1275 Central\001 59 | 4 0 0 50 -1 0 12 0.0000 4 195 945 3075 1530 Processing\001 60 | 4 0 0 50 -1 0 12 0.0000 4 150 360 3075 1785 Unit\001 61 | 4 0 0 50 -1 0 12 0.0000 4 150 450 3150 3300 Main\001 62 | 4 0 0 50 -1 0 12 0.0000 4 195 750 3150 3555 Memory\001 63 | 4 0 0 50 -1 0 12 0.0000 4 195 930 5250 3450 Secondary\001 64 | 4 0 0 50 -1 0 12 0.0000 4 195 750 5250 3705 Memory\001 65 | 4 0 0 50 -1 0 12 0.0000 4 150 780 6375 1650 Network\001 66 | 4 0 0 50 -1 0 12 0.0000 4 195 450 825 1500 Input\001 67 | 4 0 0 50 -1 0 12 0.0000 4 150 795 2775 825 Software\001 68 | 4 0 0 50 -1 0 12 0.0000 4 195 585 825 1725 Output\001 69 | 4 0 0 50 -1 0 12 0.0000 4 150 705 825 1950 Devices\001 70 | 4 0 0 50 -1 0 12 0.0000 4 150 465 5250 525 What\001 71 | 4 0 0 50 -1 0 12 0.0000 4 150 525 5250 750 Next?\001 72 | 4 0 0 50 -1 0 12 0.0000 4 150 345 3075 3975 You\001 73 | -------------------------------------------------------------------------------- /figs2/arch3.eps: -------------------------------------------------------------------------------- 1 | %!PS-Adobe-2.0 EPSF-2.0 2 | %%Title: arch3.fig 3 | %%Creator: fig2dev Version 3.2 Patchlevel 5 4 | %%CreationDate: Tue Dec 22 22:04:12 2009 5 | %%For: csev@Macintosh.local (Chuck) 6 | %%BoundingBox: 0 0 436 243 7 | %Magnification: 1.0000 8 | %%EndComments 9 | /$F2psDict 200 dict def 10 | $F2psDict begin 11 | $F2psDict /mtrx matrix put 12 | /col-1 {0 setgray} bind def 13 | /col0 {0.000 0.000 0.000 srgb} bind def 14 | /col1 {0.000 0.000 1.000 srgb} bind def 15 | /col2 {0.000 1.000 0.000 srgb} bind def 16 | /col3 {0.000 1.000 1.000 srgb} bind def 17 | /col4 {1.000 0.000 0.000 srgb} bind def 18 | /col5 {1.000 0.000 1.000 srgb} bind def 19 | /col6 {1.000 1.000 0.000 srgb} bind def 20 | /col7 {1.000 1.000 1.000 srgb} bind def 21 | /col8 {0.000 0.000 0.560 srgb} bind def 22 | /col9 {0.000 0.000 0.690 srgb} bind def 23 | /col10 {0.000 0.000 0.820 srgb} bind def 24 | /col11 {0.530 0.810 1.000 srgb} bind def 25 | /col12 {0.000 0.560 0.000 srgb} bind def 26 | /col13 {0.000 0.690 0.000 srgb} bind def 27 | /col14 {0.000 0.820 0.000 srgb} bind def 28 | /col15 {0.000 0.560 0.560 srgb} bind def 29 | /col16 {0.000 0.690 0.690 srgb} bind def 30 | /col17 {0.000 0.820 0.820 srgb} bind def 31 | /col18 {0.560 0.000 0.000 srgb} bind def 32 | /col19 {0.690 0.000 0.000 srgb} bind def 33 | /col20 {0.820 0.000 0.000 srgb} bind def 34 | /col21 {0.560 0.000 0.560 srgb} bind def 35 | /col22 {0.690 0.000 0.690 srgb} bind def 36 | /col23 {0.820 0.000 0.820 srgb} bind def 37 | /col24 {0.500 0.190 0.000 srgb} bind def 38 | /col25 {0.630 0.250 0.000 srgb} bind def 39 | /col26 {0.750 0.380 0.000 srgb} bind def 40 | /col27 {1.000 0.500 0.500 srgb} bind def 41 | /col28 {1.000 0.630 0.630 srgb} bind def 42 | /col29 {1.000 0.750 0.750 srgb} bind def 43 | /col30 {1.000 0.880 0.880 srgb} bind def 44 | /col31 {1.000 0.840 0.000 srgb} bind def 45 | 46 | end 47 | save 48 | newpath 0 243 moveto 0 0 lineto 436 0 lineto 436 243 lineto closepath clip newpath 49 | -29.5 272.0 translate 50 | 1 -1 scale 51 | 52 | /cp {closepath} bind def 53 | /ef {eofill} bind def 54 | /gr {grestore} bind def 55 | /gs {gsave} bind def 56 | /sa {save} bind def 57 | /rs {restore} bind def 58 | /l {lineto} bind def 59 | /m {moveto} bind def 60 | /rm {rmoveto} bind def 61 | /n {newpath} bind def 62 | /s {stroke} bind def 63 | /sh {show} bind def 64 | /slc {setlinecap} bind def 65 | /slj {setlinejoin} bind def 66 | /slw {setlinewidth} bind def 67 | /srgb {setrgbcolor} bind def 68 | /rot {rotate} bind def 69 | /sc {scale} bind def 70 | /sd {setdash} bind def 71 | /ff {findfont} bind def 72 | /sf {setfont} bind def 73 | /scf {scalefont} bind def 74 | /sw {stringwidth} bind def 75 | /tr {translate} bind def 76 | /tnt {dup dup currentrgbcolor 77 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add 78 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add 79 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} 80 | bind def 81 | /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 82 | 4 -2 roll mul srgb} bind def 83 | /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def 84 | /$F2psEnd {$F2psEnteredState restore end} def 85 | 86 | $F2psBegin 87 | 10 setmiterlimit 88 | 0 slj 0 slc 89 | 0.06000 0.06000 sc 90 | % 91 | % Fig objects follow 92 | % 93 | % 94 | % here starts figure with depth 50 95 | /Times-Roman ff 200.00 scf sf 96 | 3075 1800 m 97 | gs 1 -1 sc (Unit) col0 sh gr 98 | % Words 99 | % Polyline 100 | 0 slj 101 | 0 slc 102 | 30.000 slw 103 | n 5025 3000 m 6750 3000 l 6750 4275 l 5025 4275 l 104 | cp gs col0 s gr 105 | % Words 106 | % Polyline 107 | n 6000 1050 m 7725 1050 l 7725 2325 l 6000 2325 l 108 | cp gs col0 s gr 109 | % Words 110 | % Polyline 111 | n 525 1050 m 2250 1050 l 2250 2325 l 525 2325 l 112 | cp gs col0 s gr 113 | % Polyline 114 | n 2250 1725 m 115 | 2775 1725 l gs col0 s gr 116 | % Words 117 | % Polyline 118 | n 2550 525 m 4725 525 l 4725 4500 l 2550 4500 l 119 | cp gs col0 s gr 120 | % Polyline 121 | n 4500 3675 m 122 | 5025 3675 l gs col0 s gr 123 | % Polyline 124 | n 4500 1725 m 125 | 6000 1725 l gs col0 s gr 126 | % Polyline 127 | n 3600 2325 m 128 | 3600 3000 l gs col0 s gr 129 | % Words 130 | % Polyline 131 | n 2775 1050 m 4500 1050 l 4500 2325 l 2775 2325 l 132 | cp gs col0 s gr 133 | /Times-Roman ff 200.00 scf sf 134 | 3150 3300 m 135 | gs 1 -1 sc (Main) col0 sh gr 136 | /Times-Roman ff 200.00 scf sf 137 | 3150 3555 m 138 | gs 1 -1 sc (Memory) col0 sh gr 139 | /Times-Roman ff 200.00 scf sf 140 | 5250 3450 m 141 | gs 1 -1 sc (Secondary) col0 sh gr 142 | /Times-Roman ff 200.00 scf sf 143 | 5250 3705 m 144 | gs 1 -1 sc (Memory) col0 sh gr 145 | /Times-Roman ff 200.00 scf sf 146 | 6375 1650 m 147 | gs 1 -1 sc (Network) col0 sh gr 148 | /Times-Roman ff 200.00 scf sf 149 | 825 1500 m 150 | gs 1 -1 sc (Input) col0 sh gr 151 | /Times-Roman ff 200.00 scf sf 152 | 2775 825 m 153 | gs 1 -1 sc (Software) col0 sh gr 154 | /Times-Roman ff 200.00 scf sf 155 | 825 1725 m 156 | gs 1 -1 sc (Output) col0 sh gr 157 | /Times-Roman ff 200.00 scf sf 158 | 825 1950 m 159 | gs 1 -1 sc (Devices) col0 sh gr 160 | /Times-Roman ff 200.00 scf sf 161 | 3075 1350 m 162 | gs 1 -1 sc (Central) col0 sh gr 163 | /Times-Roman ff 200.00 scf sf 164 | 3075 1575 m 165 | gs 1 -1 sc (Processing) col0 sh gr 166 | % Words 167 | % Polyline 168 | n 2775 3000 m 4500 3000 l 4500 4275 l 2775 4275 l 169 | cp gs col0 s gr 170 | % here ends figure; 171 | $F2psEnd 172 | rs 173 | showpage 174 | %%Trailer 175 | %EOF 176 | -------------------------------------------------------------------------------- /figs2/arch3.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | # Words 11 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 12 | 2775 3000 4500 3000 4500 4275 2775 4275 2775 3000 13 | # Words 14 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 15 | 5025 3000 6750 3000 6750 4275 5025 4275 5025 3000 16 | # Words 17 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 18 | 6000 1050 7725 1050 7725 2325 6000 2325 6000 1050 19 | # Words 20 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 21 | 525 1050 2250 1050 2250 2325 525 2325 525 1050 22 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 23 | 2250 1725 2775 1725 24 | # Words 25 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 26 | 2550 525 4725 525 4725 4500 2550 4500 2550 525 27 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 28 | 4500 3675 5025 3675 29 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 30 | 4500 1725 6000 1725 31 | 2 1 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 32 | 3600 2325 3600 3000 33 | # Words 34 | 2 2 0 3 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 35 | 2775 1050 4500 1050 4500 2325 2775 2325 2775 1050 36 | 4 0 0 50 -1 0 12 0.0000 4 150 450 3150 3300 Main\001 37 | 4 0 0 50 -1 0 12 0.0000 4 195 750 3150 3555 Memory\001 38 | 4 0 0 50 -1 0 12 0.0000 4 195 930 5250 3450 Secondary\001 39 | 4 0 0 50 -1 0 12 0.0000 4 195 750 5250 3705 Memory\001 40 | 4 0 0 50 -1 0 12 0.0000 4 150 780 6375 1650 Network\001 41 | 4 0 0 50 -1 0 12 0.0000 4 195 450 825 1500 Input\001 42 | 4 0 0 50 -1 0 12 0.0000 4 150 795 2775 825 Software\001 43 | 4 0 0 50 -1 0 12 0.0000 4 195 585 825 1725 Output\001 44 | 4 0 0 50 -1 0 12 0.0000 4 150 705 825 1950 Devices\001 45 | 4 0 0 50 -1 0 12 0.0000 4 150 645 3075 1350 Central\001 46 | 4 0 0 50 -1 0 12 0.0000 4 195 945 3075 1575 Processing\001 47 | 4 0 0 50 -1 0 12 0.0000 4 150 360 3075 1800 Unit\001 48 | -------------------------------------------------------------------------------- /figs2/by-sa.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 2 5 0 1 0 -1 50 -1 -1 0.000 0 0 -1 0 0 5 11 | 0 by-sa.png 12 | 0 0 6045 0 6045 2115 0 2115 0 0 13 | -------------------------------------------------------------------------------- /figs2/by-sa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/by-sa.png -------------------------------------------------------------------------------- /figs2/google-map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/google-map.png -------------------------------------------------------------------------------- /figs2/if.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 19 | 21 | 28 | 33 | 34 | 41 | 42 | 60 | 62 | 63 | 65 | image/svg+xml 66 | 68 | 69 | 70 | 71 | 72 | 77 | 85 | 89 | x > 0 99 | 106 | print 'x is positive' 116 | 120 | 124 | 128 | yes 138 | no 148 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /figs2/list1.eps: -------------------------------------------------------------------------------- 1 | %!PS-Adobe-2.0 EPSF-2.0 2 | %%Title: list1.fig 3 | %%Creator: fig2dev Version 3.2 Patchlevel 5-alpha7 4 | %%CreationDate: Thu Jan 3 09:32:30 2008 5 | %%For: downey@rocky (Allen Downey,,,) 6 | %%BoundingBox: 0 0 218 42 7 | %Magnification: 1.0000 8 | %%EndComments 9 | /$F2psDict 200 dict def 10 | $F2psDict begin 11 | $F2psDict /mtrx matrix put 12 | /col-1 {0 setgray} bind def 13 | /col0 {0.000 0.000 0.000 srgb} bind def 14 | /col1 {0.000 0.000 1.000 srgb} bind def 15 | /col2 {0.000 1.000 0.000 srgb} bind def 16 | /col3 {0.000 1.000 1.000 srgb} bind def 17 | /col4 {1.000 0.000 0.000 srgb} bind def 18 | /col5 {1.000 0.000 1.000 srgb} bind def 19 | /col6 {1.000 1.000 0.000 srgb} bind def 20 | /col7 {1.000 1.000 1.000 srgb} bind def 21 | /col8 {0.000 0.000 0.560 srgb} bind def 22 | /col9 {0.000 0.000 0.690 srgb} bind def 23 | /col10 {0.000 0.000 0.820 srgb} bind def 24 | /col11 {0.530 0.810 1.000 srgb} bind def 25 | /col12 {0.000 0.560 0.000 srgb} bind def 26 | /col13 {0.000 0.690 0.000 srgb} bind def 27 | /col14 {0.000 0.820 0.000 srgb} bind def 28 | /col15 {0.000 0.560 0.560 srgb} bind def 29 | /col16 {0.000 0.690 0.690 srgb} bind def 30 | /col17 {0.000 0.820 0.820 srgb} bind def 31 | /col18 {0.560 0.000 0.000 srgb} bind def 32 | /col19 {0.690 0.000 0.000 srgb} bind def 33 | /col20 {0.820 0.000 0.000 srgb} bind def 34 | /col21 {0.560 0.000 0.560 srgb} bind def 35 | /col22 {0.690 0.000 0.690 srgb} bind def 36 | /col23 {0.820 0.000 0.820 srgb} bind def 37 | /col24 {0.500 0.190 0.000 srgb} bind def 38 | /col25 {0.630 0.250 0.000 srgb} bind def 39 | /col26 {0.750 0.380 0.000 srgb} bind def 40 | /col27 {1.000 0.500 0.500 srgb} bind def 41 | /col28 {1.000 0.630 0.630 srgb} bind def 42 | /col29 {1.000 0.750 0.750 srgb} bind def 43 | /col30 {1.000 0.880 0.880 srgb} bind def 44 | /col31 {1.000 0.840 0.000 srgb} bind def 45 | 46 | end 47 | save 48 | newpath 0 42 moveto 0 0 lineto 218 0 lineto 218 42 lineto closepath clip newpath 49 | -107.3 99.7 translate 50 | 1 -1 scale 51 | 52 | /cp {closepath} bind def 53 | /ef {eofill} bind def 54 | /gr {grestore} bind def 55 | /gs {gsave} bind def 56 | /sa {save} bind def 57 | /rs {restore} bind def 58 | /l {lineto} bind def 59 | /m {moveto} bind def 60 | /rm {rmoveto} bind def 61 | /n {newpath} bind def 62 | /s {stroke} bind def 63 | /sh {show} bind def 64 | /slc {setlinecap} bind def 65 | /slj {setlinejoin} bind def 66 | /slw {setlinewidth} bind def 67 | /srgb {setrgbcolor} bind def 68 | /rot {rotate} bind def 69 | /sc {scale} bind def 70 | /sd {setdash} bind def 71 | /ff {findfont} bind def 72 | /sf {setfont} bind def 73 | /scf {scalefont} bind def 74 | /sw {stringwidth} bind def 75 | /tr {translate} bind def 76 | /tnt {dup dup currentrgbcolor 77 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add 78 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add 79 | 4 -2 roll dup 1 exch sub 3 -1 roll mul add srgb} 80 | bind def 81 | /shd {dup dup currentrgbcolor 4 -2 roll mul 4 -2 roll mul 82 | 4 -2 roll mul srgb} bind def 83 | /$F2psBegin {$F2psDict begin /$F2psEnteredState save def} def 84 | /$F2psEnd {$F2psEnteredState restore end} def 85 | 86 | $F2psBegin 87 | 10 setmiterlimit 88 | 0 slj 0 slc 89 | 0.06000 0.06000 sc 90 | % 91 | % Fig objects follow 92 | % 93 | % 94 | % here starts figure with depth 51 95 | % Polyline 96 | 0 slj 97 | 0 slc 98 | 7.500 slw 99 | n 3825 975 m 5400 975 l 5400 1650 l 3825 1650 l 100 | cp gs col7 0.90 shd ef gr gs col0 s gr 101 | % Polyline 102 | n 1800 975 m 3375 975 l 3375 1650 l 1800 1650 l 103 | cp gs col7 0.90 shd ef gr gs col0 s gr 104 | % Polyline 105 | gs clippath 106 | 4375 1413 m 4522 1375 l 4507 1317 l 4360 1355 l 4360 1355 l 4484 1354 l 4375 1413 l cp 107 | eoclip 108 | n 4125 1447 m 109 | 4500 1350 l gs col0 s gr gr 110 | 111 | % arrowhead 112 | n 4375 1413 m 4484 1354 l 4360 1355 l col0 s 113 | % Polyline 114 | gs clippath 115 | 4360 1259 m 4504 1308 l 4523 1251 l 4380 1202 l 4380 1202 l 4484 1270 l 4360 1259 l cp 116 | eoclip 117 | n 4125 1147 m 118 | 4500 1275 l gs col0 s gr gr 119 | 120 | % arrowhead 121 | n 4360 1259 m 4484 1270 l 4380 1202 l col0 s 122 | % Polyline 123 | gs clippath 124 | 2338 1477 m 2490 1477 l 2490 1417 l 2338 1417 l 2338 1417 l 2458 1447 l 2338 1477 l cp 125 | eoclip 126 | n 2100 1447 m 127 | 2475 1447 l gs col0 s gr gr 128 | 129 | % arrowhead 130 | n 2338 1477 m 2458 1447 l 2338 1417 l col0 s 131 | % Polyline 132 | gs clippath 133 | 2338 1177 m 2490 1177 l 2490 1117 l 2338 1117 l 2338 1117 l 2458 1147 l 2338 1177 l cp 134 | eoclip 135 | n 2100 1147 m 136 | 2475 1147 l gs col0 s gr gr 137 | 138 | % arrowhead 139 | n 2338 1177 m 2458 1147 l 2338 1117 l col0 s 140 | /Helvetica ff 183.33 scf sf 141 | 4050 1200 m 142 | gs 1 -1 sc (a) dup sw pop neg 0 rm col0 sh gr 143 | /Helvetica ff 183.33 scf sf 144 | 4050 1500 m 145 | gs 1 -1 sc (b) dup sw pop neg 0 rm col0 sh gr 146 | /Helvetica ff 183.33 scf sf 147 | 4575 1350 m 148 | gs 1 -1 sc ('banana') col0 sh gr 149 | /Helvetica ff 183.33 scf sf 150 | 2025 1200 m 151 | gs 1 -1 sc (a) dup sw pop neg 0 rm col0 sh gr 152 | /Helvetica ff 183.33 scf sf 153 | 2025 1500 m 154 | gs 1 -1 sc (b) dup sw pop neg 0 rm col0 sh gr 155 | /Helvetica ff 183.33 scf sf 156 | 2550 1200 m 157 | gs 1 -1 sc ('banana') col0 sh gr 158 | /Helvetica ff 183.33 scf sf 159 | 2550 1500 m 160 | gs 1 -1 sc ('banana') col0 sh gr 161 | % here ends figure; 162 | $F2psEnd 163 | rs 164 | showpage 165 | %%Trailer 166 | %EOF 167 | -------------------------------------------------------------------------------- /figs2/list1.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5-alpha5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 11 | 0 0 1.00 60.00 120.00 12 | 4125 1447 4500 1350 13 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 14 | 0 0 1.00 60.00 120.00 15 | 4125 1147 4500 1275 16 | 2 2 0 1 0 7 51 0 18 0.000 0 0 -1 0 0 5 17 | 3825 975 5400 975 5400 1650 3825 1650 3825 975 18 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 19 | 0 0 1.00 60.00 120.00 20 | 2100 1447 2475 1447 21 | 2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 22 | 0 0 1.00 60.00 120.00 23 | 2100 1147 2475 1147 24 | 2 2 0 1 0 7 51 0 18 0.000 0 0 -1 0 0 5 25 | 1800 975 3375 975 3375 1650 1800 1650 1800 975 26 | 4 2 0 50 0 16 11 0.0000 4 105 105 4050 1200 a\001 27 | 4 2 0 50 0 16 11 0.0000 4 135 105 4050 1500 b\001 28 | 4 0 0 50 0 16 11 0.0000 4 135 720 4575 1350 'banana'\001 29 | 4 2 0 50 0 16 11 0.0000 4 105 105 2025 1200 a\001 30 | 4 2 0 50 0 16 11 0.0000 4 135 105 2025 1500 b\001 31 | 4 0 0 50 0 16 11 0.0000 4 135 720 2550 1200 'banana'\001 32 | 4 0 0 50 0 16 11 0.0000 4 135 720 2550 1500 'banana'\001 33 | -------------------------------------------------------------------------------- /figs2/mailorg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/mailorg.png -------------------------------------------------------------------------------- /figs2/pagerank.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/pagerank.png -------------------------------------------------------------------------------- /figs2/pda.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 6 2700 1800 3300 2325 11 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 12 | 3300 2325 3300 1800 2700 1800 2700 2325 3300 2325 13 | 4 0 0 50 -1 0 12 0.0000 4 150 525 2775 2280 Next?\001 14 | 4 0 0 50 -1 0 12 0.0000 4 150 465 2775 2025 What\001 15 | -6 16 | 6 2700 2550 3300 3075 17 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 18 | 3300 3075 3300 2550 2700 2550 2700 3075 3300 3075 19 | 4 0 0 50 -1 0 12 0.0000 4 150 525 2775 3030 Next?\001 20 | 4 0 0 50 -1 0 12 0.0000 4 150 465 2775 2775 What\001 21 | -6 22 | 6 3525 2550 4125 3075 23 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 24 | 4125 3075 4125 2550 3525 2550 3525 3075 4125 3075 25 | 4 0 0 50 -1 0 12 0.0000 4 150 525 3600 3030 Next?\001 26 | 4 0 0 50 -1 0 12 0.0000 4 150 465 3600 2775 What\001 27 | -6 28 | 6 3525 1800 4125 2325 29 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 30 | 4125 2325 4125 1800 3525 1800 3525 2325 4125 2325 31 | 4 0 0 50 -1 0 12 0.0000 4 150 525 3600 2280 Next?\001 32 | 4 0 0 50 -1 0 12 0.0000 4 150 465 3600 2025 What\001 33 | -6 34 | 6 1875 1800 2475 2325 35 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 36 | 2475 2325 2475 1800 1875 1800 1875 2325 2475 2325 37 | 4 0 0 50 -1 0 12 0.0000 4 150 525 1950 2280 Next?\001 38 | 4 0 0 50 -1 0 12 0.0000 4 150 465 1950 2025 What\001 39 | -6 40 | 6 1875 2550 2475 3075 41 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 42 | 2475 3075 2475 2550 1875 2550 1875 3075 2475 3075 43 | 4 0 0 50 -1 0 12 0.0000 4 150 525 1950 3030 Next?\001 44 | 4 0 0 50 -1 0 12 0.0000 4 150 465 1950 2775 What\001 45 | -6 46 | 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4537 2400 252 252 4537 2400 4789 2400 47 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 4537 2400 75 75 4537 2400 4612 2400 48 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 12 0 0 5 49 | 4875 3225 4875 1650 1725 1650 1725 3225 4875 3225 50 | 4 0 0 50 -1 0 12 0.0000 4 150 435 4350 2925 PDA\001 51 | -------------------------------------------------------------------------------- /figs2/pda2.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5 2 | Landscape 3 | Center 4 | Inches 5 | Letter 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 1 0 7 50 -1 -1 0.000 1 0.0000 4537 2400 252 252 4537 2400 4789 2400 11 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 4537 2400 75 75 4537 2400 4612 2400 12 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 12 0 0 5 13 | 4875 3225 4875 1650 1725 1650 1725 3225 4875 3225 14 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 15 | 2475 2325 2475 1800 1875 1800 1875 2325 2475 2325 16 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 17 | 2475 3075 2475 2550 1875 2550 1875 3075 2475 3075 18 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 19 | 3300 2325 3300 1800 2700 1800 2700 2325 3300 2325 20 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 21 | 4125 2325 4125 1800 3525 1800 3525 2325 4125 2325 22 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 23 | 4125 3075 4125 2550 3525 2550 3525 3075 4125 3075 24 | 2 4 0 1 0 7 50 -1 -1 0.000 0 0 8 0 0 5 25 | 3300 3075 3300 2550 2700 2550 2700 3075 3300 3075 26 | 4 0 0 50 -1 0 12 0.0000 4 150 435 4350 2925 PDA\001 27 | 4 0 0 50 -1 0 12 0.0000 4 150 375 1950 2280 Me!\001 28 | 4 0 0 50 -1 0 12 0.0000 4 150 375 1950 2025 Pick\001 29 | 4 0 0 50 -1 0 12 0.0000 4 150 375 2775 2025 Pick\001 30 | 4 0 0 50 -1 0 12 0.0000 4 150 375 3600 2025 Pick\001 31 | 4 0 0 50 -1 0 12 0.0000 4 195 345 3600 2775 Buy\001 32 | 4 0 0 50 -1 0 12 0.0000 4 150 375 2775 2775 Pick\001 33 | 4 0 0 50 -1 0 12 0.0000 4 150 375 1950 2775 Pick\001 34 | 4 0 0 50 -1 0 12 0.0000 4 150 375 1950 3030 Me!\001 35 | 4 0 0 50 -1 0 12 0.0000 4 150 375 3600 2280 Me!\001 36 | 4 0 0 50 -1 0 12 0.0000 4 195 480 3600 3030 Me :)\001 37 | 4 0 0 50 -1 0 12 0.0000 4 150 375 2775 2280 Me!\001 38 | 4 0 0 50 -1 0 12 0.0000 4 150 375 2775 3030 Me!\001 39 | -------------------------------------------------------------------------------- /figs2/sqlite.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/sqlite.png -------------------------------------------------------------------------------- /figs2/tracks.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 18 | 20 | 27 | 28 | 46 | 48 | 49 | 51 | image/svg+xml 52 | 54 | 55 | 56 | 57 | 58 | 63 | 70 | 77 | Tracks 87 | title 97 | Thunderstruck 107 | My Way 117 | 20 127 | 15 137 | 141 | 145 | plays 155 | 156 | 157 | -------------------------------------------------------------------------------- /figs2/wordcloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/figs2/wordcloud.png -------------------------------------------------------------------------------- /fixhtml.py: -------------------------------------------------------------------------------- 1 | import os 2 | from BeautifulSoup import * 3 | count = 0 4 | for (dirname, dirs, files) in os.walk('html'): 5 | for filename in files: 6 | if not filename.endswith('.html') : continue 7 | filename = 'html/' + filename 8 | fhand = open(filename) 9 | html = fhand.read() 10 | fhand.close() 11 | html = html.replace('``','"') 12 | html = html.replace("''",'"') 13 | soup = BeautifulSoup(html) 14 | html = str(soup) 15 | open(filename,"w").write(html) 16 | print filename, len(html) 17 | -------------------------------------------------------------------------------- /hevea.sty: -------------------------------------------------------------------------------- 1 | % hevea : hevea.sty 2 | % This is a very basic style file for latex document to be processed 3 | % with hevea. It contains definitions of LaTeX environment which are 4 | % processed in a special way by the translator. 5 | % Mostly : 6 | % - latexonly, not processed by hevea, processed by latex. 7 | % - htmlonly , the reverse. 8 | % - rawhtml, to include raw HTML in hevea output. 9 | % - toimage, to send text to the image file. 10 | % The package also provides hevea logos, html related commands (ahref 11 | % etc.), void cutting and image commands. 12 | \NeedsTeXFormat{LaTeX2e} 13 | \ProvidesPackage{hevea}[2002/01/11] 14 | \RequirePackage{comment} 15 | \newif\ifhevea\heveafalse 16 | \@ifundefined{ifimagen}{\newif\ifimagen\imagenfalse} 17 | \makeatletter% 18 | \newcommand{\heveasmup}[2]{% 19 | \raise #1\hbox{$\m@th$% 20 | \csname S@\f@size\endcsname 21 | \fontsize\sf@size 0% 22 | \math@fontsfalse\selectfont 23 | #2% 24 | }}% 25 | \DeclareRobustCommand{\hevea}{H\kern-.15em\heveasmup{.2ex}{E}\kern-.15emV\kern-.15em\heveasmup{.2ex}{E}\kern-.15emA}% 26 | \DeclareRobustCommand{\hacha}{H\kern-.15em\heveasmup{.2ex}{A}\kern-.15emC\kern-.1em\heveasmup{.2ex}{H}\kern-.15emA}% 27 | \DeclareRobustCommand{\html}{\protect\heveasmup{0.ex}{HTML}} 28 | %%%%%%%%% Hyperlinks hevea style 29 | \newcommand{\ahref}[2]{{#2}} 30 | \newcommand{\ahrefloc}[2]{{#2}} 31 | \newcommand{\aname}[2]{{#2}} 32 | \newcommand{\ahrefurl}[1]{\texttt{#1}} 33 | \newcommand{\footahref}[2]{#2\footnote{\texttt{#1}}} 34 | \newcommand{\mailto}[1]{\texttt{#1}} 35 | \newcommand{\imgsrc}[2][]{} 36 | \newcommand{\home}[1]{\protect\raisebox{-.75ex}{\char126}#1} 37 | \AtBeginDocument 38 | {\@ifundefined{url} 39 | {%url package is not loaded 40 | \let\url\ahref\let\oneurl\ahrefurl\let\footurl\footahref} 41 | {}} 42 | %% Void cutting instructions 43 | \newcounter{cuttingdepth} 44 | \newcommand{\tocnumber}{} 45 | \newcommand{\notocnumber}{} 46 | \newcommand{\cuttingunit}{} 47 | \newcommand{\cutdef}[2][]{} 48 | \newcommand{\cuthere}[2]{} 49 | \newcommand{\cutend}{} 50 | \newcommand{\htmlhead}[1]{} 51 | \newcommand{\htmlfoot}[1]{} 52 | \newcommand{\htmlprefix}[1]{} 53 | \newenvironment{cutflow}[1]{}{} 54 | \newcommand{\cutname}[1]{} 55 | \newcommand{\toplinks}[3]{} 56 | \newcommand{\setlinkstext}[3]{} 57 | \newcommand{\flushdef}[1]{} 58 | \newcommand{\footnoteflush}[1]{} 59 | %%%% Html only 60 | \excludecomment{rawhtml} 61 | \newcommand{\rawhtmlinput}[1]{} 62 | \excludecomment{htmlonly} 63 | %%%% Latex only 64 | \newenvironment{latexonly}{}{} 65 | \newenvironment{verblatex}{}{} 66 | %%%% Image file stuff 67 | \def\toimage{\endgroup} 68 | \def\endtoimage{\begingroup\def\@currenvir{toimage}} 69 | \def\verbimage{\endgroup} 70 | \def\endverbimage{\begingroup\def\@currenvir{verbimage}} 71 | \newcommand{\imageflush}[1][]{} 72 | %%% Bgcolor definition 73 | \newsavebox{\@bgcolorbin} 74 | \newenvironment{bgcolor}[2][] 75 | {\newcommand{\@mycolor}{#2}\begin{lrbox}{\@bgcolorbin}\vbox\bgroup} 76 | {\egroup\end{lrbox}% 77 | \begin{flushleft}% 78 | \colorbox{\@mycolor}{\usebox{\@bgcolorbin}}% 79 | \end{flushleft}} 80 | %%% Style sheets macros, defined as no-ops 81 | \newcommand{\newstyle}[2]{} 82 | \newcommand{\addstyle}[1]{} 83 | \newcommand{\setenvclass}[2]{} 84 | \newcommand{\getenvclass}[1]{} 85 | \newcommand{\loadcssfile}[1]{} 86 | \newenvironment{divstyle}[1]{}{} 87 | \newenvironment{cellstyle}[2]{}{} 88 | \newif\ifexternalcss 89 | %%% Postlude 90 | \makeatother 91 | -------------------------------------------------------------------------------- /html-snap/cfbook001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook001.png -------------------------------------------------------------------------------- /html-snap/cfbook002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook002.png -------------------------------------------------------------------------------- /html-snap/cfbook003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook003.png -------------------------------------------------------------------------------- /html-snap/cfbook004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook004.png -------------------------------------------------------------------------------- /html-snap/cfbook005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook005.png -------------------------------------------------------------------------------- /html-snap/cfbook006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook006.png -------------------------------------------------------------------------------- /html-snap/cfbook007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook007.png -------------------------------------------------------------------------------- /html-snap/cfbook008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook008.png -------------------------------------------------------------------------------- /html-snap/cfbook009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook009.png -------------------------------------------------------------------------------- /html-snap/cfbook010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook010.png -------------------------------------------------------------------------------- /html-snap/cfbook011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook011.png -------------------------------------------------------------------------------- /html-snap/cfbook012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook012.png -------------------------------------------------------------------------------- /html-snap/cfbook013.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook013.png -------------------------------------------------------------------------------- /html-snap/cfbook014.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook014.png -------------------------------------------------------------------------------- /html-snap/cfbook015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook015.png -------------------------------------------------------------------------------- /html-snap/cfbook016.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook016.png -------------------------------------------------------------------------------- /html-snap/cfbook017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook017.png -------------------------------------------------------------------------------- /html-snap/cfbook018.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | Python Programming on Windows 9 | 10 | 11 | 12 | Previous 13 | Up 14 | Next 15 |
16 |

Appendix A  Python Programming on Windows

17 | In this appendix, we walk through a series of steps 18 | so you can run Python on Windows. There are many different 19 | approaches you can take, and this is just one 20 | approach to keep things simple.
21 |
22 | First, you need to install a programmer editor. You 23 | do not want to use Notepad or Microsoft Word to edit 24 | Python programs. Programs must be in "flat-text" files 25 | and so you need an editor that is good at 26 | editing text files.
27 |
28 | Our recommended editor for Windows is NotePad++ which 29 | can be downloaded and installed from:
30 |
31 | http://sourceforge.net/projects/notepad-plus/files/
32 |
33 | Then download a recent version of Python 2 from the 34 | www.python.org web site.
35 |
36 | http://www.python.org/download/releases/2.7.5/
37 |
38 | Once you have installed Python, you should have a new 39 | folder on your computer like C:\Python27.
40 |
41 | To create a Python program, run NotePad++ from the Start Menu 42 | and save the file with a suffix of ".py". For this 43 | exercise, put a folder on your Desktop named 44 | py4inf. It is best to keep your folder names short 45 | and not to have any spaces in your folder or file name.
46 |
47 | Lets make our first Python program be: 48 |

 49 | print 'Hello Chuck'
 50 | 
Except that you should change it to be your name. Lets 51 | save the file into Desktop\py4inf\prog1.py.
52 |
53 | The run the command line. Different versions of Windows 54 | do this differently: 55 |
63 | You will find yourself in a text window with a prompt that 64 | tells you what folder you are currently "in".
65 |
66 | Windows Vista and Windows-7: C:\Users\csev
67 | Windows XP: C:\Documents and Settings\csev
68 |
69 | This is your "home directory". Now we need to move into 70 | the folder where you have saved your Python program using 71 | the following commands: 72 |

 73 | C:\Users\csev\> cd Desktop
 74 | C:\Users\csev\Desktop> cd py4inf
 75 | 
Then type 76 |

 77 | C:\Users\csev\Desktop\py4inf> dir 
 78 | 
To list your files. You should see the prog1.py when 79 | you type the dir command.
80 |
81 | To run your program, simply type the name of your file at the 82 | command prompt and press enter. 83 |

 84 | C:\Users\csev\Desktop\py4inf> prog1.py
 85 | Hello Chuck
 86 | C:\Users\csev\Desktop\py4inf> 
 87 | 
You can edit the file in NotePad++, save it and then switch back 88 | to the command line and execute the program again by typing 89 | the file name again at the command line prompt.
90 |
91 | If you get confused in the command line window - just close it 92 | and start a new one.
93 |
94 | Hint: You can also press the "up-arrow" in the command line to 95 | scroll back and run a previously entered command again.
96 |
97 | You should also look in the preferences for NotePad++ and set it 98 | to expand tab characters to be four spaces. It will save you lots 99 | of effort looking for indentation errors.
100 |
101 | You can also find further information on editing and running 102 | Python programs at www.py4inf.com.

103 |
104 |
105 | Previous 106 | Up 107 | Next 108 | 109 | 110 | -------------------------------------------------------------------------------- /html-snap/cfbook018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook018.png -------------------------------------------------------------------------------- /html-snap/cfbook019.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | Python Programming on Macintosh 9 | 10 | 11 | 12 | Previous 13 | Up 14 | Next 15 |
16 |

Appendix B  Python Programming on Macintosh

17 | In this appendix, we walk through a series of steps 18 | so you can run Python on Macintosh. Since Python is 19 | already included in the Macintosh Operating system, we need to 20 | learn how to edit Python files and run Python programs 21 | in the terminal window.
22 |
23 | There approaches you can take to editing and running 24 | Python programs, and this is just one 25 | approach we have found to be very simple.
26 |
27 | First, you need to install a programmer editor. You 28 | do not want to use TextEdit or Microsoft Word to edit 29 | Python programs. Programs must be in "flat-text" files 30 | and so you need an editor that is good at 31 | editing text files.
32 |
33 | Our recommended editor for Macintosh is TextWrangler which 34 | can be downloaded and installed from:
35 |
36 | http://www.barebones.com/products/TextWrangler/
37 |
38 | To create a Python program, run from 39 | TextWrangler from your Applications folder.
40 |
41 | Lets make our first Python program be: 42 |

 43 | print 'Hello Chuck'
 44 | 
Except that you should change it to be your name. 45 | Lets save the file in a folder on your Desktop named 46 | py4inf. It is best to keep your folder names short 47 | and not to have any spaces in your folder or file name. 48 | Once you have made the folder, save the file 49 | into Desktop\py4inf\prog1.py.
50 |
51 | The run the Terminal program. The easiest way is to 52 | press the Spotlight icon (the magnifying glass) in the upper 53 | right of your screen and enter "terminal" and launch the 54 | application that comes up.
55 |
56 | You start in your "home directory". You can see the current 57 | directory by typing the pwd command in the terminal window. 58 |

 59 | 67-194-80-15:~ csev$ pwd
 60 | /Users/csev
 61 | 67-194-80-15:~ csev$ 
 62 | 
We must be in the folder that contains your Python program 63 | to run the program. We user the cd command to move to a new 64 | folder and then the ls command to list the files in the 65 | folder. 66 |

 67 | 67-194-80-15:~ csev$ cd Desktop
 68 | 67-194-80-15:Desktop csev$ cd py4inf
 69 | 67-194-80-15:py4inf csev$ ls
 70 | prog1.py
 71 | 67-194-80-15:py4inf csev$ 
 72 | 
To run your program, simply type the python command followed 73 | by the name of your file at the 74 | command prompt and press enter. 75 |

 76 | 67-194-80-15:py4inf csev$ python prog1.py
 77 | Hello Chuck
 78 | 67-194-80-15:py4inf csev$ 
 79 | 
You can edit the file in TextWrangler, save it and then switch back 80 | to the command line and execute the program again by typing 81 | the file name again at the command line prompt.
82 |
83 | If you get confused in the command line window - just close it 84 | and start a new one.
85 |
86 | Hint: You can also press the "up-arrow" in the command line to 87 | scroll back and run a previously entered command again.
88 |
89 | You should also look in the preferences for TextWrangler and set it 90 | to expand tab characters to be four spaces. It will save you lots 91 | of effort looking for indentation errors.
92 |
93 | You can also find further information on editing and running 94 | Python programs at www.py4inf.com.

95 |
96 |
97 | Previous 98 | Up 99 | Next 100 | 101 | 102 | -------------------------------------------------------------------------------- /html-snap/cfbook019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook019.png -------------------------------------------------------------------------------- /html-snap/cfbook020.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | Contributor List 9 | 10 | 11 | 12 | Previous 13 | Up 14 | Next 15 |
16 |

Appendix C  Contributor List

17 | 18 |

Contributor List for "Python for Informatics"

19 | Bruce Shields for copy editing early drafts, 20 | Sarah Hegge, 21 | Steven Cherry, 22 | Sarah Kathleen Barbarow, 23 | Andrea Parker, 24 | Radaphat Chongthammakun, 25 | Megan Hixon, 26 | Kirby Urner, 27 | Sarah Kathleen Barbrow, 28 | Katie Kujala, 29 | Noah Botimer, 30 | Emily Alinder, 31 | Mark Thompson-Kular, 32 | James Perry, 33 | Eric Hofer, 34 | Eytan Adar, 35 | Peter Robinson, 36 | Deborah J. Nelson, 37 | Jonathan C. Anthony, 38 | Eden Rassette, 39 | Jeannette Schroeder, 40 | Justin Feezell, 41 | Chuanqi Li, 42 | Gerald Gordinier, 43 | Gavin Thomas Strassel, 44 | Ryan Clement, 45 | Alissa Talley, 46 | Caitlin Holman, 47 | Yong-Mi Kim, 48 | Karen Stover, 49 | Cherie Edmonds, 50 | Maria Seiferle, 51 | Romer Kristi D. Aranas (RK), 52 | Grant Boyer,
53 |
54 | 55 |

Contributor List for "Think Python"

56 | 57 | (Allen B. Downey)
58 |
59 | More than 100 sharp-eyed and thoughtful readers have sent in 60 | suggestions and corrections over the past few years. Their 61 | contributions, and enthusiasm for this project, have been a 62 | huge help.
63 |
64 | For the detail on the nature of each of the contributions from 65 | these individuals, see the "Think Python" text.
66 |
67 | Lloyd Hugh Allen, 68 | Yvon Boulianne, 69 | Fred Bremmer, 70 | Jonah Cohen, 71 | Michael Conlon, 72 | Benoit Girard, 73 | Courtney Gleason and Katherine Smith, 74 | Lee Harr, 75 | James Kaylin, 76 | David Kershaw, 77 | Eddie Lam, 78 | Man-Yong Lee, 79 | David Mayo, 80 | Chris McAloon, 81 | Matthew J. Moelter, 82 | Simon Dicon Montford, 83 | John Ouzts, 84 | Kevin Parks, 85 | David Pool, 86 | Michael Schmitt, 87 | Robin Shaw, 88 | Paul Sleigh, 89 | Craig T. Snydal, 90 | Ian Thomas, 91 | Keith Verheyden, 92 | Peter Winstanley, 93 | Chris Wrobel, 94 | Moshe Zadka, 95 | Christoph Zwerschke, 96 | James Mayer, 97 | Hayden McAfee, 98 | Angel Arnal, 99 | Tauhidul Hoque and Lex Berezhny, 100 | Dr. Michele Alzetta, 101 | Andy Mitchell, 102 | Kalin Harvey, 103 | Christopher P. Smith, 104 | David Hutchins, 105 | Gregor Lingl, 106 | Julie Peters, 107 | Florin Oprina, 108 | D. J. Webre, 109 | Ken, 110 | Ivo Wever, 111 | Curtis Yanko, 112 | Ben Logan, 113 | Jason Armstrong, 114 | Louis Cordier, 115 | Brian Cain, 116 | Rob Black, 117 | Jean-Philippe Rey at Ecole Centrale Paris, 118 | Jason Mader at George Washington University made a number 119 | Jan Gundtofte-Bruun, 120 | Abel David and Alexis Dinno, 121 | Charles Thayer, 122 | Roger Sperberg, 123 | Sam Bull, 124 | Andrew Cheung, 125 | C. Corey Capel, 126 | Alessandra, 127 | Wim Champagne, 128 | Douglas Wright, 129 | Jared Spindor, 130 | Lin Peiheng, 131 | Ray Hagtvedt, 132 | Torsten Hübsch, 133 | Inga Petuhhov, 134 | Arne Babenhauserheide, 135 | Mark E. Casida, 136 | Scott Tyler, 137 | Gordon Shephard, 138 | Andrew Turner, 139 | Adam Hobart, 140 | Daryl Hammond and Sarah Zimmerman, 141 | George Sass, 142 | Brian Bingham, 143 | Leah Engelbert-Fenton, 144 | Joe Funke, 145 | Chao-chao Chen, 146 | Jeff Paine, 147 | Lubos Pintes, 148 | Gregg Lind and Abigail Heithoff, 149 | Max Hailperin, 150 | Chotipat Pornavalai, 151 | Stanislaw Antol, 152 | Eric Pashman, 153 | Miguel Azevedo, 154 | Jianhua Liu, 155 | Nick King, 156 | Martin Zuther, 157 | Adam Zimmerman, 158 | Ratnakar Tiwari, 159 | Anurag Goel, 160 | Kelli Kratzer, 161 | Mark Griffiths, 162 | Roydan Ongie, 163 | Patryk Wolowiec, 164 | Mark Chonofsky, 165 | Russell Coleman, 166 | Wei Huang, 167 | Karen Barber, 168 | Nam Nguyen, 169 | Stéphane Morin, 170 | and 171 | Paul Stoop.

172 |
173 |
174 | Previous 175 | Up 176 | Next 177 | 178 | 179 | -------------------------------------------------------------------------------- /html-snap/cfbook020.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook020.png -------------------------------------------------------------------------------- /html-snap/cfbook021.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | 8 | Copyright Detail 9 | 10 | 11 | 12 | Previous 13 | Up 14 | Next 15 |
16 |

Appendix D  Copyright Detail

17 | This work is licensed under a 18 | Creative Common 19 | Attribution-NonCommercial-ShareAlike 3.0 Unported License. 20 | This license is 21 | available at 22 | creativecommons.org/licenses/by-nc-sa/3.0/.
23 |
24 | I would have preferred to license the book under the less 25 | restrictive CC-BY-SA license. But unfortunately there are 26 | a few unscrupulous 27 | organizations who search for and find freely licensed books, 28 | and then publish and sell virtually unchanged copies of the books on a 29 | print on demand service such as LuLu or CreateSpace. CreateSpace 30 | has (thankfully) added a policy that gives the wishes of the actual 31 | copyright holder preference over a non-copyright holder attempting 32 | to publish a freely licensed work. Unfortunately there are many 33 | print-on-demand services and very few have as well-considered a policy 34 | as CreateSpace.
35 |
36 | Regretfully, I added the NC element to the license 37 | this book to give me recourse in case someone tries to clone this 38 | book and sell it commercially. Unfortunately, adding NC limits uses 39 | of this material that I would like to permit. So I have added this 40 | section of the document to describe specific situations where 41 | I am giving my permission in advance to use the material in this book 42 | in situations that some might consider commercial. 43 |
55 | If you are intending to translate the book, you may want to contact me 56 | so we can make sure that you have all of the related course materials so 57 | you can translate them as well.
58 |
59 | Of course, you are welcome to contact me and ask for permission if these 60 | clauses are not sufficient. In all cases, permission to reuse and 61 | remix this material will be granted as long as there is clear added value 62 | or benefit to students or teachers that will accrue as a result of the 63 | new work.
64 |
65 | Charles Severance
66 | www.dr-chuck.com
67 | Ann Arbor, MI, USA
68 | September 9, 2013

69 |
70 |
71 | Previous 72 | Up 73 | Next 74 | 75 | 76 | -------------------------------------------------------------------------------- /html-snap/cfbook021.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook021.png -------------------------------------------------------------------------------- /html-snap/cfbook022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook022.png -------------------------------------------------------------------------------- /html-snap/cfbook023.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook023.png -------------------------------------------------------------------------------- /html-snap/cfbook024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/cfbook024.png -------------------------------------------------------------------------------- /html-snap/contents_motif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/contents_motif.gif -------------------------------------------------------------------------------- /html-snap/next_motif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/next_motif.gif -------------------------------------------------------------------------------- /html-snap/previous_motif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html-snap/previous_motif.gif -------------------------------------------------------------------------------- /html.sh: -------------------------------------------------------------------------------- 1 | # This needs netpbm and hevea from Darwin Ports 2 | 3 | rm -rf html 4 | mkdir html 5 | hevea -O -e latexonly png.hva htmlonly book 6 | # the following line is a kludge to prevent imagen from seeing 7 | # the definitions in latexonly 8 | grep -v latexonly book.image.tex > a; mv a book.image.tex 9 | imagen -png book 10 | hacha book.html 11 | mv index.html book.css book*.html book*.png book*.gif *motif.gif html 12 | rm book.haux book.hind book.htoc book.image.tex 13 | 14 | echo " " 15 | echo "Patching the HTML ..." 16 | python fixhtml.py 17 | 18 | -------------------------------------------------------------------------------- /html_270.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/html_270.zip -------------------------------------------------------------------------------- /htmlonly: -------------------------------------------------------------------------------- 1 | % put commands here that should be used for the HTML 2 | % version of the book but not Postscript or PDF 3 | 4 | \newcommand{\beforefig}{} 5 | \newcommand{\afterfig}{} 6 | 7 | \newcommand{\beforeverb}{\blue \large} 8 | \newcommand{\afterverb}{\black \normalsize} 9 | 10 | \newcommand{\adjustpage}[1]{} 11 | 12 | \newcommand{\clearemptydoublepage}{} 13 | \newcommand{\blankpage}{} 14 | 15 | \newcommand{\spacing}{} 16 | \newcommand{\endspacing}{} 17 | 18 | \newcommand{\frontmatter}{} 19 | \newcommand{\mainmatter}{} 20 | 21 | \newcommand{\theoremstyle}[1]{} 22 | \newcommand{\newtheoremstyle}[1]{} 23 | 24 | \newcommand{\vfill}{} 25 | 26 | \newcommand{\textbackslash}{\char`\\} 27 | 28 | \htmlhead{\rawhtmlinput{header.html}} 29 | 30 | \htmlfoot{\rawhtmlinput{footer.html}} 31 | -------------------------------------------------------------------------------- /latexonly: -------------------------------------------------------------------------------- 1 | \sloppy 2 | %\setlength{\topmargin}{-0.375in} 3 | %\setlength{\oddsidemargin}{0.0in} 4 | %\setlength{\evensidemargin}{0.0in} 5 | 6 | % Uncomment these to center on 8.5 x 11 7 | %\setlength{\topmargin}{0.625in} 8 | %\setlength{\oddsidemargin}{0.875in} 9 | %\setlength{\evensidemargin}{0.875in} 10 | 11 | %\setlength{\textheight}{7.2in} 12 | 13 | \setlength{\headsep}{3ex} 14 | \setlength{\parindent}{0.0in} 15 | \setlength{\parskip}{1.7ex plus 0.5ex minus 0.5ex} 16 | \renewcommand{\baselinestretch}{1.02} 17 | 18 | % see LaTeX Companion page 62 19 | \setlength{\topsep}{-0.0\parskip} 20 | \setlength{\partopsep}{-0.5\parskip} 21 | \setlength{\itemindent}{0.0in} 22 | \setlength{\listparindent}{0.0in} 23 | 24 | % see LaTeX Companion page 26 25 | % these are copied from /usr/local/teTeX/share/texmf/tex/latex/base/book.cls 26 | % all I changed is afterskip 27 | 28 | \makeatletter 29 | 30 | \renewcommand{\section}{\@startsection 31 | {section} {1} {0mm}% 32 | {-3.5ex \@plus -1ex \@minus -.2ex}% 33 | {0.7ex \@plus.2ex}% 34 | {\normalfont\Large\bfseries}} 35 | \renewcommand\subsection{\@startsection {subsection}{2}{0mm}% 36 | {-3.25ex\@plus -1ex \@minus -.2ex}% 37 | {0.3ex \@plus .2ex}% 38 | {\normalfont\large\bfseries}} 39 | \renewcommand\subsubsection{\@startsection {subsubsection}{3}{0mm}% 40 | {-3.25ex\@plus -1ex \@minus -.2ex}% 41 | {0.3ex \@plus .2ex}% 42 | {\normalfont\normalsize\bfseries}} 43 | 44 | % The following line adds a little extra space to the column 45 | % in which the Section numbers appear in the table of contents 46 | \renewcommand{\l@section}{\@dottedtocline{1}{1.5em}{3.0em}} 47 | \setcounter{tocdepth}{1} 48 | 49 | \makeatother 50 | 51 | \newcommand{\beforefig}{\vspace{1.3\parskip}} 52 | \newcommand{\afterfig}{\vspace{-0.2\parskip}} 53 | 54 | \newcommand{\beforeverb}{\vspace{0.6\parskip\fontsize{9}{11}}} 55 | \newcommand{\afterverb}{\vspace{0.6\parskip\normalsize}} 56 | 57 | \newcommand{\adjustpage}[1]{\enlargethispage{#1\baselineskip}} 58 | 59 | 60 | % Note: the following command seems to cause problems for Acroreader 61 | % on Windows, so for now I am overriding it. 62 | %\newcommand{\clearemptydoublepage}{ 63 | % \newpage{\pagestyle{empty}\cleardoublepage}} 64 | \newcommand{\clearemptydoublepage}{\cleardoublepage} 65 | 66 | %\newcommand{\blankpage}{\pagestyle{empty}\vspace*{1in}\newpage} 67 | \newcommand{\blankpage}{\vspace*{1in}\newpage} 68 | 69 | % HEADERS 70 | 71 | \renewcommand{\chaptermark}[1]{\markboth{#1}{}} 72 | \renewcommand{\sectionmark}[1]{\markright{\thesection\ #1}{}} 73 | 74 | \lhead[\fancyplain{}{\bfseries\thepage}]% 75 | {\fancyplain{}{\bfseries\rightmark}} 76 | \rhead[\fancyplain{}{\bfseries\leftmark}]% 77 | {\fancyplain{}{\bfseries\thepage}} 78 | \cfoot{} 79 | 80 | \pagestyle{fancyplain} 81 | 82 | 83 | % turn off the rule under the header 84 | %\setlength{\headrulewidth}{0pt} 85 | 86 | % the following is a brute-force way to prevent the headers 87 | % from getting transformed into all-caps 88 | \renewcommand\MakeUppercase{} 89 | 90 | % Exercise environment 91 | \newtheoremstyle{myex}% name 92 | {9pt}% Space above 93 | {9pt}% Space below 94 | {}% Body font 95 | {}% Indent amount (empty = no indent, \parindent = para indent) 96 | {\bfseries}% Thm head font 97 | {}% Punctuation after thm head 98 | {0.5em}% Space after thm head: " " = normal interword space; 99 | % \newline = linebreak 100 | {}% Thm head spec (can be left empty, meaning `normal') 101 | 102 | \theoremstyle{myex} 103 | -------------------------------------------------------------------------------- /notes/allen-downey-permission.txt: -------------------------------------------------------------------------------- 1 | From: Allen Downey 2 | Date: January 8, 2010 11:46:16 AM EST 3 | To: csev 4 | Cc: Jeff Elkner 5 | Subject: Re: I will send you a copy of "Python for Informatics" 6 | 7 | Hi Chuck, 8 | 9 | 1) Thanks for sending me a copy.  The best address for me is 10 | .............. Needham MA 02492. 11 | 12 | 2) About the FDL requirement to include the entire license, 13 | I agree that it is a nuisance, and I hereby grant you permission to 14 | produce a modified version of Think Python subject to the terms of 15 | the GNU FDL but excluding the requirement to include the entire 16 | license. 17 | 18 | 3) I am the sole copyright holder on Think Python, so I hereby grant you permission to produce a modified version of Think Python under the Creative Commons Attribution-Share Alike license (whichever version you like). 19 | 20 | (BTW, Green Tea Press is me) 21 | 22 | Good luck with your class! 23 | 24 | Allen 25 | 26 | -------------------------------------------------------------------------------- /png.hva: -------------------------------------------------------------------------------- 1 | \renewcommand{\heveaimageext}{.png} -------------------------------------------------------------------------------- /spanish.hva: -------------------------------------------------------------------------------- 1 | 2 | %%%% Hevea support for babel option 'spanish'. 3 | 4 | %%%% Resets 5 | %%%% a) date format 6 | %%%% b) names of various part descriptors (contentsname etc.) 7 | % 8 | \newcommand{\spanish@quotes} 9 | {\def{\flqq}{\@print@u{0171}}% 10 | \def{\glqq}{\@print@u{8222}}% 11 | \def{\glqq}{\@print@u{8222}}% 12 | \def{\grqq}{\@print@u{8220}}% 13 | \def{\glq}{\@print@u{8218}}% 14 | \def{\grq}{\@print@u{8216}}% 15 | \def{\frqq}{\@print@u{0187}}% 16 | \def{\flq}{\@print@u{8249}}% 17 | \def{\frq}{\@print@u{8250}}% 18 | \let\og\flqq\let\fg\frqq% 19 | }% 20 | \newcommand{\spanish@babel}{ 21 | \spanish@quotes% 22 | \let\spanish@day\default@day% 23 | \def\csname f@month1\endcsname{enero}% 24 | \def\csname f@month2\endcsname{febrero}% 25 | \def\csname f@month3\endcsname{marzo}% 26 | \def\csname f@month4\endcsname{abril}% 27 | \def\csname f@month5\endcsname{mayo}% 28 | \def\csname f@month6\endcsname{junio}% 29 | \def\csname f@month7\endcsname{julio}% 30 | \def\csname f@month8\endcsname{agosto}% 31 | \def\csname f@month9\endcsname{septiembre}% 32 | \def\csname f@month10\endcsname{octubre}% 33 | \def\csname f@month11\endcsname{noviembre}% 34 | \def\csname f@month12\endcsname{diciembre}% 35 | \def\spanish@month{\csname f@month\arabic{month}\endcsname}% 36 | \@ifundefined{theyear}{}{\def\today{\theday~\spanish@month~\theyear}}% 37 | \def\prefacename{Prefacio}% 38 | \def\refname{Referencias}% 39 | \def\abstractname{Abstracto}% 40 | \def\bibname{Bibliograf\'ia}% 41 | \def\chaptername{Chap\'itulo}% 42 | \def\appendixname{Ap\'endice}% 43 | \def\contentsname{Contenido}% 44 | \def\listfigurename{Lista de Figuras}% 45 | \def\listtablename{Lista de Tablas}% 46 | \def\indexname{\'Indice}% 47 | \def\figurename{Figura}% 48 | \def\tablename{Tabla}% 49 | \def\partname{Parte}% 50 | \def\enclname{adjunto}% 51 | \def\ccname{copia para}% 52 | \def\headtoname{Para}% 53 | \def\pagename{P'agina}% 54 | \def\headpagename{P'agina}% 55 | \def\seename{ver}% 56 | \def\alsoseename{ver tambi'en}% 57 | \def\footertext{Este documento ha sido traducido desde \LaTeX{} por 58 | \footahref{\heveaurl/index.html}{\hevea}.}% 59 | } 60 | -------------------------------------------------------------------------------- /translations/KO/book_009_ko.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/csev/py4inf/e68273927aeb0decbe6b24703de6c30494f0fc55/translations/KO/book_009_ko.pdf -------------------------------------------------------------------------------- /typos.py: -------------------------------------------------------------------------------- 1 | fn = raw_input("Enter name: "); 2 | fh = open(fn); 3 | 4 | count = 0; 5 | lastword = None 6 | for line in fh: 7 | count = count + 1 8 | words = line.split() 9 | for word in words: 10 | if len(word) > 3 : 11 | ch1 = word[0:1] 12 | ch2 = word[1:2] 13 | ch3 = word[2:3] 14 | if ch1 >= 'A' and ch1 <= 'Z' and ch2 >= 'A' and ch2 <= 'Z' and ch3 >= 'a' and ch3 <= 'z' : 15 | print count, word 16 | 17 | if len(word) < 2 : 18 | lastword = word 19 | continue 20 | word = word.lower() 21 | ch = word[0:1] 22 | if ch < 'a' or ch > 'z' : 23 | lastword = word 24 | continue 25 | if lastword == word: 26 | print count, word, lastword 27 | lastword = word 28 | 29 | --------------------------------------------------------------------------------